diff options
Diffstat (limited to 'third_party/rust/wgpu-core')
52 files changed, 32442 insertions, 0 deletions
diff --git a/third_party/rust/wgpu-core/.cargo-checksum.json b/third_party/rust/wgpu-core/.cargo-checksum.json new file mode 100644 index 0000000000..bf0a48e81a --- /dev/null +++ b/third_party/rust/wgpu-core/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"92c0bcfb5bf68fb55acb6e7b826ec07c1cfdd6d53b057c16a5c698e044ea228e","LICENSE.APACHE":"a6cba85bc92e0cff7a450b1d873c0eaa2e9fc96bf472df0247a26bec77bf3ff9","LICENSE.MIT":"c7fea58d1cfe49634cd92e54fc10a9d871f4b275321a4cd8c09e449122caaeb4","build.rs":"a99478d7f63fb41429e3834f4d0e5cd333f94ba1834c68295f929170e16987de","src/any_surface.rs":"1c032bc1894a222a47f0116b976f1543c1140c0534678502ee1172d4f77fc515","src/binding_model.rs":"2bd4e4a36742ccf0cab0afa039411a791e2a6e9ea3909d0b85cc9a84cc151c6b","src/command/bind.rs":"a37f042484b65d9fdea4cdab3667381623ee9a8943a6d32683d410b92736d306","src/command/bundle.rs":"91513a3be0adf46a9f3454b6a3d00ff6686729eb91fe9dd6d732cbfa1ff6d1d8","src/command/clear.rs":"b20e93c4b8cb47062b38e472f78d28d9ec00fd1169b17a87094be7f9d1c995e1","src/command/compute.rs":"eb60f0e2842dd20b366905225af24f4ca2a1b0c67914b86009c5b870b26f747f","src/command/draw.rs":"e8a664fc248e273e8c0e4aaeb645010b3f4ec61d29d858137f31f6f653c86542","src/command/memory_init.rs":"6ec93b9e2eb21edaa534e60770b4ba95735e9de61e74d827bc492df8e3639449","src/command/mod.rs":"d6a66a5796bd824be72af2c8d3ece59a507090c61cb50e9856eb4c70a28945e2","src/command/query.rs":"dffc843746b10ba9a50b8a2b92a59b407b56a845619a96d72a5883588fcb50f0","src/command/render.rs":"c3783b4f19b4eafb33f94554aea69408d42e40b5e98da22aa804a0931430ea6f","src/command/transfer.rs":"bf1077d1a99a258bad46087ae7234703627e7f4d30b38e6142d016c02deaad3a","src/conv.rs":"7e3ffe33b47a6fd3617aabf9f11cc68f1ccbee2c7343b8dbbcd0e8f3447e1ad8","src/device/any_device.rs":"65f47b58939b60f88f47861e65d5d45209492df8e73e7c1b60b3b459f510c09e","src/device/bgl.rs":"ec8bdd6e9b4cd50c25bed317275863d0c16bb6619f62ed85bf0464948010dfc1","src/device/global.rs":"7d70a45bd39e251c6945fc475883c4e69632f92a7abe263adab6e47a248de5a4","src/device/life.rs":"cd12343d5a14d82b18b787991811b36f420719776336f8a65b45c32fd47a77d4","src/device/mod.rs":"fff41f92e1a9f6660e18dc30452d9911ca827701bb8303af2ae06f1c1e1a795f","src/device/queue.rs":"2ffc477d1bebb35a1fc8e46f4ca2c5ef50a4eb6034968f076062461b2e678699","src/device/resource.rs":"4f22cf27da8d829b624877d7d3bb10971a0e8fb7c4f95d85d5011049a010684a","src/device/trace.rs":"9deb1b083165e07253b4928ac2f564aba06f9089c3aca1c0a1d438d87d981542","src/error.rs":"e3b6b7a69877437f4e46af7f0e8ca1db1822beae7c8448db41c2bae0f64b2bb4","src/global.rs":"0966475959706650fd036a18d51441a8e14c3ef10107db617f597614ca47e50a","src/hal_api.rs":"1cd9c3fe1c9d8c3a24e3e7f963a2ef26e056a2b26d529b840dbc969090aaf201","src/hash_utils.rs":"e8d484027c7ce81978e4679a5e20af9416ab7d2fa595f1ca95992b29d625b0ca","src/hub.rs":"352a1b75d4535f24b06d16134421db98f910e6e719f50f863a204df6768e3369","src/id.rs":"c736c0b3d35cf620e2c01322d57c4938b42828b39948ecad82d39fc39c1093c1","src/identity.rs":"c6a719389d71bb11c9ceaeadb0496f8b4c6ad24e35597e12b40980ad7ad72f10","src/init_tracker/buffer.rs":"61eb9cfaa312135b7a937ff6a3117f531b5b7323fae6553a41d6de9bc106d7e0","src/init_tracker/mod.rs":"a0f64730cc025113b656b4690f9dcb0ec18b8770bc7ef24c7b4ad8bebae03d24","src/init_tracker/texture.rs":"030fd594bf9948fad391390d85c5e1fec7eaf67b6e812c60f2dd59bc4fda8fd5","src/instance.rs":"c9b5b53a0aeac8e117d49a3a007fab001cd5737e29dd75388cdbfc24f3d8df08","src/lib.rs":"49174591f8116c3b8fadb185f89ce69ae931ee6e9f639d2558848db82ea1651f","src/pipeline.rs":"300f58afc16c454ce52aabff6debd7a7db85ed627b111a8801bcb201827f110c","src/pool.rs":"778ea1c23fcfaaa5001606e686f712f606826039d60dd5a3cd26e7de91ac057a","src/present.rs":"86b1e8bd7314f77f083be6d89a2f734e92f2ed11c86eb4c912c754fcdaa2e597","src/registry.rs":"dbc9310a24a843cf6b94a4bab78b0bb5f325e18c1f3c19c94d4f12b4f29e8598","src/resource.rs":"cd568c9d1abd4bf740cb86efae7862b5478518f3b1cdaf792ae05b3c0920c8e0","src/snatch.rs":"29a1135ee09c06883eac4df6f45b7220c2ba8f89f34232ea1d270d6e7b05c7a8","src/storage.rs":"f0c41461b8f9cdc862dbd3de04c8e720ee416c7c57310696f6f4fd22183fcc85","src/track/buffer.rs":"65c27dfabe7a1c3e4ddbde7189e53b2e95f3f3663aa82b121801a2fd0dcbd304","src/track/metadata.rs":"ac82a9c69b0a141b5c3ca69b203c5aa2a17578b598cab3ae156b917cef734b97","src/track/mod.rs":"8f03955447544f3ebcb48547440a48d321ad1ff0e0c601a62623b5457763b8de","src/track/range.rs":"2a15794e79b0470d5ba6b3267173a42f34312878e1cb288f198d2854a7888e53","src/track/stateless.rs":"2da10160c46d07ad15986ba6f1356b7933806fc5c3fa5a9d8deea44d9a3c93a7","src/track/texture.rs":"15892e639f2ecbb13c8d34c29e3fd6ad719cb71e2d40c64910b552b8985ddab0","src/validation.rs":"613c58c3601f36d6aa5986cea01f30497c6bd4ceb990824904d101b2327941a9"},"package":null}
\ No newline at end of file diff --git a/third_party/rust/wgpu-core/Cargo.toml b/third_party/rust/wgpu-core/Cargo.toml new file mode 100644 index 0000000000..f9692cf607 --- /dev/null +++ b/third_party/rust/wgpu-core/Cargo.toml @@ -0,0 +1,133 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.70" +name = "wgpu-core" +version = "0.19.0" +authors = ["gfx-rs developers"] +description = "WebGPU core logic on wgpu-hal" +homepage = "https://wgpu.rs/" +keywords = ["graphics"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/gfx-rs/wgpu" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = [ + "--cfg", + "docsrs", +] +targets = [ + "x86_64-unknown-linux-gnu", + "x86_64-apple-darwin", + "x86_64-pc-windows-msvc", + "wasm32-unknown-unknown", +] + +[lib] + +[dependencies] +arrayvec = "0.7" +bit-vec = "0.6" +bitflags = "2" +codespan-reporting = "0.11" +indexmap = "2" +log = "0.4" +once_cell = "1" +parking_lot = ">=0.11,<0.13" +rustc-hash = "1.1" +smallvec = "1" +thiserror = "1" + +[dependencies.bytemuck] +version = "1.14" +optional = true + +[dependencies.hal] +version = "0.19.0" +path = "../wgpu-hal" +default_features = false +package = "wgpu-hal" + +[dependencies.naga] +version = "0.19.0" +path = "../naga" +features = ["clone"] + +[dependencies.profiling] +version = "1" +default-features = false + +[dependencies.raw-window-handle] +version = "0.6" +optional = true + +[dependencies.ron] +version = "0.8" +optional = true + +[dependencies.serde] +version = "1" +features = ["serde_derive"] +optional = true + +[dependencies.wgt] +version = "0.19.0" +path = "../wgpu-types" +package = "wgpu-types" + +[build-dependencies] +cfg_aliases = "0.1" + +[features] +api_log_info = [] +default = ["link"] +dx12 = ["hal/dx12"] +fragile-send-sync-non-atomic-wasm = [ + "hal/fragile-send-sync-non-atomic-wasm", + "wgt/fragile-send-sync-non-atomic-wasm", +] +gles = ["hal/gles"] +glsl = ["naga/glsl-in"] +link = ["hal/link"] +metal = ["hal/metal"] +renderdoc = ["hal/renderdoc"] +replay = [ + "serde", + "naga/deserialize", +] +resource_log_info = [] +serde = [ + "dep:serde", + "wgt/serde", + "arrayvec/serde", +] +spirv = [ + "naga/spv-in", + "dep:bytemuck", +] +strict_asserts = ["wgt/strict_asserts"] +trace = [ + "ron", + "serde", + "naga/serialize", +] +vulkan = ["hal/vulkan"] +wgsl = ["naga/wgsl-in"] + +[target."cfg(all(target_arch = \"wasm32\", not(target_os = \"emscripten\")))".dependencies.web-sys] +version = "0.3.67" +features = [ + "HtmlCanvasElement", + "OffscreenCanvas", +] diff --git a/third_party/rust/wgpu-core/LICENSE.APACHE b/third_party/rust/wgpu-core/LICENSE.APACHE new file mode 100644 index 0000000000..d9a10c0d8e --- /dev/null +++ b/third_party/rust/wgpu-core/LICENSE.APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/third_party/rust/wgpu-core/LICENSE.MIT b/third_party/rust/wgpu-core/LICENSE.MIT new file mode 100644 index 0000000000..4699691b8e --- /dev/null +++ b/third_party/rust/wgpu-core/LICENSE.MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 The gfx-rs developers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/rust/wgpu-core/build.rs b/third_party/rust/wgpu-core/build.rs new file mode 100644 index 0000000000..2f715fdb2a --- /dev/null +++ b/third_party/rust/wgpu-core/build.rs @@ -0,0 +1,13 @@ +fn main() { + cfg_aliases::cfg_aliases! { + send_sync: { any( + not(target_arch = "wasm32"), + all(feature = "fragile-send-sync-non-atomic-wasm", not(target_feature = "atomics")) + ) }, + webgl: { all(target_arch = "wasm32", not(target_os = "emscripten"), gles) }, + dx12: { all(target_os = "windows", feature = "dx12") }, + gles: { all(feature = "gles") }, + metal: { all(any(target_os = "ios", target_os = "macos"), feature = "metal") }, + vulkan: { all(not(target_arch = "wasm32"), feature = "vulkan") } + } +} diff --git a/third_party/rust/wgpu-core/src/any_surface.rs b/third_party/rust/wgpu-core/src/any_surface.rs new file mode 100644 index 0000000000..94edfc4433 --- /dev/null +++ b/third_party/rust/wgpu-core/src/any_surface.rs @@ -0,0 +1,95 @@ +use wgt::Backend; + +/// The `AnySurface` type: a `Arc` of a `A::Surface` for any backend `A`. +use crate::hal_api::HalApi; + +use std::fmt; +use std::mem::ManuallyDrop; +use std::ptr::NonNull; + +struct AnySurfaceVtable { + // We oppurtunistically store the backend here, since we now it will be used + // with backend selection and it can be stored in static memory. + backend: Backend, + // Drop glue which knows how to drop the stored data. + drop: unsafe fn(*mut ()), +} + +/// An `A::Surface`, for any backend `A`. +/// +/// Any `AnySurface` is just like an `A::Surface`, except that the `A` type +/// parameter is erased. To access the `Surface`, you must downcast to a +/// particular backend with the \[`downcast_ref`\] or \[`take`\] methods. +pub struct AnySurface { + data: NonNull<()>, + vtable: &'static AnySurfaceVtable, +} + +impl AnySurface { + /// Construct an `AnySurface` that owns an `A::Surface`. + pub fn new<A: HalApi>(surface: A::Surface) -> AnySurface { + unsafe fn drop_glue<A: HalApi>(ptr: *mut ()) { + unsafe { + _ = Box::from_raw(ptr.cast::<A::Surface>()); + } + } + + let data = NonNull::from(Box::leak(Box::new(surface))); + + AnySurface { + data: data.cast(), + vtable: &AnySurfaceVtable { + backend: A::VARIANT, + drop: drop_glue::<A>, + }, + } + } + + /// Get the backend this surface was created through. + pub fn backend(&self) -> Backend { + self.vtable.backend + } + + /// If `self` refers to an `A::Surface`, returns a reference to it. + pub fn downcast_ref<A: HalApi>(&self) -> Option<&A::Surface> { + if A::VARIANT != self.vtable.backend { + return None; + } + + // SAFETY: We just checked the instance above implicitly by the backend + // that it was statically constructed through. + Some(unsafe { &*self.data.as_ptr().cast::<A::Surface>() }) + } + + /// If `self` is an `Arc<A::Surface>`, returns that. + pub fn take<A: HalApi>(self) -> Option<A::Surface> { + if A::VARIANT != self.vtable.backend { + return None; + } + + // Disable drop glue, since we're returning the owned surface. The + // caller will be responsible for dropping it. + let this = ManuallyDrop::new(self); + + // SAFETY: We just checked the instance above implicitly by the backend + // that it was statically constructed through. + Some(unsafe { *Box::from_raw(this.data.as_ptr().cast::<A::Surface>()) }) + } +} + +impl Drop for AnySurface { + fn drop(&mut self) { + unsafe { (self.vtable.drop)(self.data.as_ptr()) } + } +} + +impl fmt::Debug for AnySurface { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "AnySurface<{}>", self.vtable.backend) + } +} + +#[cfg(send_sync)] +unsafe impl Send for AnySurface {} +#[cfg(send_sync)] +unsafe impl Sync for AnySurface {} diff --git a/third_party/rust/wgpu-core/src/binding_model.rs b/third_party/rust/wgpu-core/src/binding_model.rs new file mode 100644 index 0000000000..d7b54ad5a5 --- /dev/null +++ b/third_party/rust/wgpu-core/src/binding_model.rs @@ -0,0 +1,953 @@ +#[cfg(feature = "trace")] +use crate::device::trace; +use crate::{ + device::{ + bgl, Device, DeviceError, MissingDownlevelFlags, MissingFeatures, SHADER_STAGE_COUNT, + }, + error::{ErrorFormatter, PrettyError}, + hal_api::HalApi, + id::{BindGroupLayoutId, BufferId, SamplerId, TextureId, TextureViewId}, + init_tracker::{BufferInitTrackerAction, TextureInitTrackerAction}, + resource::{Resource, ResourceInfo, ResourceType}, + resource_log, + snatch::{SnatchGuard, Snatchable}, + track::{BindGroupStates, UsageConflict}, + validation::{MissingBufferUsageError, MissingTextureUsageError}, + Label, +}; + +use arrayvec::ArrayVec; + +#[cfg(feature = "serde")] +use serde::Deserialize; +#[cfg(feature = "serde")] +use serde::Serialize; + +use std::{borrow::Cow, ops::Range, sync::Arc}; + +use thiserror::Error; + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum BindGroupLayoutEntryError { + #[error("Cube dimension is not expected for texture storage")] + StorageTextureCube, + #[error("Read-write and read-only storage textures are not allowed by webgpu, they require the native only feature TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES")] + StorageTextureReadWrite, + #[error("Arrays of bindings unsupported for this type of binding")] + ArrayUnsupported, + #[error("Multisampled binding with sample type `TextureSampleType::Float` must have filterable set to false.")] + SampleTypeFloatFilterableBindingMultisampled, + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CreateBindGroupLayoutError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Conflicting binding at index {0}")] + ConflictBinding(u32), + #[error("Binding {binding} entry is invalid")] + Entry { + binding: u32, + #[source] + error: BindGroupLayoutEntryError, + }, + #[error(transparent)] + TooManyBindings(BindingTypeMaxCountError), + #[error("Binding index {binding} is greater than the maximum index {maximum}")] + InvalidBindingIndex { binding: u32, maximum: u32 }, + #[error("Invalid visibility {0:?}")] + InvalidVisibility(wgt::ShaderStages), +} + +//TODO: refactor this to move out `enum BindingError`. + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CreateBindGroupError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Bind group layout is invalid")] + InvalidLayout, + #[error("Buffer {0:?} is invalid or destroyed")] + InvalidBuffer(BufferId), + #[error("Texture view {0:?} is invalid")] + InvalidTextureView(TextureViewId), + #[error("Texture {0:?} is invalid")] + InvalidTexture(TextureId), + #[error("Sampler {0:?} is invalid")] + InvalidSampler(SamplerId), + #[error( + "Binding count declared with at most {expected} items, but {actual} items were provided" + )] + BindingArrayPartialLengthMismatch { actual: usize, expected: usize }, + #[error( + "Binding count declared with exactly {expected} items, but {actual} items were provided" + )] + BindingArrayLengthMismatch { actual: usize, expected: usize }, + #[error("Array binding provided zero elements")] + BindingArrayZeroLength, + #[error("Bound buffer range {range:?} does not fit in buffer of size {size}")] + BindingRangeTooLarge { + buffer: BufferId, + range: Range<wgt::BufferAddress>, + size: u64, + }, + #[error("Buffer binding size {actual} is less than minimum {min}")] + BindingSizeTooSmall { + buffer: BufferId, + actual: u64, + min: u64, + }, + #[error("Buffer binding size is zero")] + BindingZeroSize(BufferId), + #[error("Number of bindings in bind group descriptor ({actual}) does not match the number of bindings defined in the bind group layout ({expected})")] + BindingsNumMismatch { actual: usize, expected: usize }, + #[error("Binding {0} is used at least twice in the descriptor")] + DuplicateBinding(u32), + #[error("Unable to find a corresponding declaration for the given binding {0}")] + MissingBindingDeclaration(u32), + #[error(transparent)] + MissingBufferUsage(#[from] MissingBufferUsageError), + #[error(transparent)] + MissingTextureUsage(#[from] MissingTextureUsageError), + #[error("Binding declared as a single item, but bind group is using it as an array")] + SingleBindingExpected, + #[error("Buffer offset {0} does not respect device's requested `{1}` limit {2}")] + UnalignedBufferOffset(wgt::BufferAddress, &'static str, u32), + #[error( + "Buffer binding {binding} range {given} exceeds `max_*_buffer_binding_size` limit {limit}" + )] + BufferRangeTooLarge { + binding: u32, + given: u32, + limit: u32, + }, + #[error("Binding {binding} has a different type ({actual:?}) than the one in the layout ({expected:?})")] + WrongBindingType { + // Index of the binding + binding: u32, + // The type given to the function + actual: wgt::BindingType, + // Human-readable description of expected types + expected: &'static str, + }, + #[error("Texture binding {binding} expects multisampled = {layout_multisampled}, but given a view with samples = {view_samples}")] + InvalidTextureMultisample { + binding: u32, + layout_multisampled: bool, + view_samples: u32, + }, + #[error("Texture binding {binding} expects sample type = {layout_sample_type:?}, but given a view with format = {view_format:?}")] + InvalidTextureSampleType { + binding: u32, + layout_sample_type: wgt::TextureSampleType, + view_format: wgt::TextureFormat, + }, + #[error("Texture binding {binding} expects dimension = {layout_dimension:?}, but given a view with dimension = {view_dimension:?}")] + InvalidTextureDimension { + binding: u32, + layout_dimension: wgt::TextureViewDimension, + view_dimension: wgt::TextureViewDimension, + }, + #[error("Storage texture binding {binding} expects format = {layout_format:?}, but given a view with format = {view_format:?}")] + InvalidStorageTextureFormat { + binding: u32, + layout_format: wgt::TextureFormat, + view_format: wgt::TextureFormat, + }, + #[error("Storage texture bindings must have a single mip level, but given a view with mip_level_count = {mip_level_count:?} at binding {binding}")] + InvalidStorageTextureMipLevelCount { binding: u32, mip_level_count: u32 }, + #[error("Sampler binding {binding} expects comparison = {layout_cmp}, but given a sampler with comparison = {sampler_cmp}")] + WrongSamplerComparison { + binding: u32, + layout_cmp: bool, + sampler_cmp: bool, + }, + #[error("Sampler binding {binding} expects filtering = {layout_flt}, but given a sampler with filtering = {sampler_flt}")] + WrongSamplerFiltering { + binding: u32, + layout_flt: bool, + sampler_flt: bool, + }, + #[error("Bound texture views can not have both depth and stencil aspects enabled")] + DepthStencilAspect, + #[error("The adapter does not support read access for storages texture of format {0:?}")] + StorageReadNotSupported(wgt::TextureFormat), + #[error(transparent)] + ResourceUsageConflict(#[from] UsageConflict), +} + +impl PrettyError for CreateBindGroupError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + match *self { + Self::BindingZeroSize(id) => { + fmt.buffer_label(&id); + } + Self::BindingRangeTooLarge { buffer, .. } => { + fmt.buffer_label(&buffer); + } + Self::BindingSizeTooSmall { buffer, .. } => { + fmt.buffer_label(&buffer); + } + Self::InvalidBuffer(id) => { + fmt.buffer_label(&id); + } + Self::InvalidTextureView(id) => { + fmt.texture_view_label(&id); + } + Self::InvalidSampler(id) => { + fmt.sampler_label(&id); + } + _ => {} + }; + } +} + +#[derive(Clone, Debug, Error)] +pub enum BindingZone { + #[error("Stage {0:?}")] + Stage(wgt::ShaderStages), + #[error("Whole pipeline")] + Pipeline, +} + +#[derive(Clone, Debug, Error)] +#[error("Too many bindings of type {kind:?} in {zone}, limit is {limit}, count was {count}")] +pub struct BindingTypeMaxCountError { + pub kind: BindingTypeMaxCountErrorKind, + pub zone: BindingZone, + pub limit: u32, + pub count: u32, +} + +#[derive(Clone, Debug)] +pub enum BindingTypeMaxCountErrorKind { + DynamicUniformBuffers, + DynamicStorageBuffers, + SampledTextures, + Samplers, + StorageBuffers, + StorageTextures, + UniformBuffers, +} + +#[derive(Debug, Default)] +pub(crate) struct PerStageBindingTypeCounter { + vertex: u32, + fragment: u32, + compute: u32, +} + +impl PerStageBindingTypeCounter { + pub(crate) fn add(&mut self, stage: wgt::ShaderStages, count: u32) { + if stage.contains(wgt::ShaderStages::VERTEX) { + self.vertex += count; + } + if stage.contains(wgt::ShaderStages::FRAGMENT) { + self.fragment += count; + } + if stage.contains(wgt::ShaderStages::COMPUTE) { + self.compute += count; + } + } + + pub(crate) fn max(&self) -> (BindingZone, u32) { + let max_value = self.vertex.max(self.fragment.max(self.compute)); + let mut stage = wgt::ShaderStages::NONE; + if max_value == self.vertex { + stage |= wgt::ShaderStages::VERTEX + } + if max_value == self.fragment { + stage |= wgt::ShaderStages::FRAGMENT + } + if max_value == self.compute { + stage |= wgt::ShaderStages::COMPUTE + } + (BindingZone::Stage(stage), max_value) + } + + pub(crate) fn merge(&mut self, other: &Self) { + self.vertex = self.vertex.max(other.vertex); + self.fragment = self.fragment.max(other.fragment); + self.compute = self.compute.max(other.compute); + } + + pub(crate) fn validate( + &self, + limit: u32, + kind: BindingTypeMaxCountErrorKind, + ) -> Result<(), BindingTypeMaxCountError> { + let (zone, count) = self.max(); + if limit < count { + Err(BindingTypeMaxCountError { + kind, + zone, + limit, + count, + }) + } else { + Ok(()) + } + } +} + +#[derive(Debug, Default)] +pub(crate) struct BindingTypeMaxCountValidator { + dynamic_uniform_buffers: u32, + dynamic_storage_buffers: u32, + sampled_textures: PerStageBindingTypeCounter, + samplers: PerStageBindingTypeCounter, + storage_buffers: PerStageBindingTypeCounter, + storage_textures: PerStageBindingTypeCounter, + uniform_buffers: PerStageBindingTypeCounter, +} + +impl BindingTypeMaxCountValidator { + pub(crate) fn add_binding(&mut self, binding: &wgt::BindGroupLayoutEntry) { + let count = binding.count.map_or(1, |count| count.get()); + match binding.ty { + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset, + .. + } => { + self.uniform_buffers.add(binding.visibility, count); + if has_dynamic_offset { + self.dynamic_uniform_buffers += count; + } + } + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, + has_dynamic_offset, + .. + } => { + self.storage_buffers.add(binding.visibility, count); + if has_dynamic_offset { + self.dynamic_storage_buffers += count; + } + } + wgt::BindingType::Sampler { .. } => { + self.samplers.add(binding.visibility, count); + } + wgt::BindingType::Texture { .. } => { + self.sampled_textures.add(binding.visibility, count); + } + wgt::BindingType::StorageTexture { .. } => { + self.storage_textures.add(binding.visibility, count); + } + wgt::BindingType::AccelerationStructure => todo!(), + } + } + + pub(crate) fn merge(&mut self, other: &Self) { + self.dynamic_uniform_buffers += other.dynamic_uniform_buffers; + self.dynamic_storage_buffers += other.dynamic_storage_buffers; + self.sampled_textures.merge(&other.sampled_textures); + self.samplers.merge(&other.samplers); + self.storage_buffers.merge(&other.storage_buffers); + self.storage_textures.merge(&other.storage_textures); + self.uniform_buffers.merge(&other.uniform_buffers); + } + + pub(crate) fn validate(&self, limits: &wgt::Limits) -> Result<(), BindingTypeMaxCountError> { + if limits.max_dynamic_uniform_buffers_per_pipeline_layout < self.dynamic_uniform_buffers { + return Err(BindingTypeMaxCountError { + kind: BindingTypeMaxCountErrorKind::DynamicUniformBuffers, + zone: BindingZone::Pipeline, + limit: limits.max_dynamic_uniform_buffers_per_pipeline_layout, + count: self.dynamic_uniform_buffers, + }); + } + if limits.max_dynamic_storage_buffers_per_pipeline_layout < self.dynamic_storage_buffers { + return Err(BindingTypeMaxCountError { + kind: BindingTypeMaxCountErrorKind::DynamicStorageBuffers, + zone: BindingZone::Pipeline, + limit: limits.max_dynamic_storage_buffers_per_pipeline_layout, + count: self.dynamic_storage_buffers, + }); + } + self.sampled_textures.validate( + limits.max_sampled_textures_per_shader_stage, + BindingTypeMaxCountErrorKind::SampledTextures, + )?; + self.storage_buffers.validate( + limits.max_storage_buffers_per_shader_stage, + BindingTypeMaxCountErrorKind::StorageBuffers, + )?; + self.samplers.validate( + limits.max_samplers_per_shader_stage, + BindingTypeMaxCountErrorKind::Samplers, + )?; + self.storage_buffers.validate( + limits.max_storage_buffers_per_shader_stage, + BindingTypeMaxCountErrorKind::StorageBuffers, + )?; + self.storage_textures.validate( + limits.max_storage_textures_per_shader_stage, + BindingTypeMaxCountErrorKind::StorageTextures, + )?; + self.uniform_buffers.validate( + limits.max_uniform_buffers_per_shader_stage, + BindingTypeMaxCountErrorKind::UniformBuffers, + )?; + Ok(()) + } +} + +/// Bindable resource and the slot to bind it to. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct BindGroupEntry<'a> { + /// Slot for which binding provides resource. Corresponds to an entry of the same + /// binding index in the [`BindGroupLayoutDescriptor`]. + pub binding: u32, + /// Resource to attach to the binding + pub resource: BindingResource<'a>, +} + +/// Describes a group of bindings and the resources to be bound. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct BindGroupDescriptor<'a> { + /// Debug label of the bind group. + /// + /// This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// The [`BindGroupLayout`] that corresponds to this bind group. + pub layout: BindGroupLayoutId, + /// The resources to bind to this bind group. + pub entries: Cow<'a, [BindGroupEntry<'a>]>, +} + +/// Describes a [`BindGroupLayout`]. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct BindGroupLayoutDescriptor<'a> { + /// Debug label of the bind group layout. + /// + /// This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// Array of entries in this BindGroupLayout + pub entries: Cow<'a, [wgt::BindGroupLayoutEntry]>, +} + +pub type BindGroupLayouts<A> = crate::storage::Storage<BindGroupLayout<A>>; + +/// Bind group layout. +#[derive(Debug)] +pub struct BindGroupLayout<A: HalApi> { + pub(crate) raw: Option<A::BindGroupLayout>, + pub(crate) device: Arc<Device<A>>, + pub(crate) entries: bgl::EntryMap, + /// It is very important that we know if the bind group comes from the BGL pool. + /// + /// If it does, then we need to remove it from the pool when we drop it. + /// + /// We cannot unconditionally remove from the pool, as BGLs that don't come from the pool + /// (derived BGLs) must not be removed. + pub(crate) origin: bgl::Origin, + #[allow(unused)] + pub(crate) binding_count_validator: BindingTypeMaxCountValidator, + pub(crate) info: ResourceInfo<BindGroupLayout<A>>, + pub(crate) label: String, +} + +impl<A: HalApi> Drop for BindGroupLayout<A> { + fn drop(&mut self) { + if matches!(self.origin, bgl::Origin::Pool) { + self.device.bgl_pool.remove(&self.entries); + } + if let Some(raw) = self.raw.take() { + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyBindGroupLayout(self.info.id())); + } + + resource_log!("Destroy raw BindGroupLayout {:?}", self.info.label()); + unsafe { + use hal::Device; + self.device.raw().destroy_bind_group_layout(raw); + } + } + } +} + +impl<A: HalApi> Resource for BindGroupLayout<A> { + const TYPE: ResourceType = "BindGroupLayout"; + + type Marker = crate::id::markers::BindGroupLayout; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } + + fn label(&self) -> String { + self.label.clone() + } +} +impl<A: HalApi> BindGroupLayout<A> { + pub(crate) fn raw(&self) -> &A::BindGroupLayout { + self.raw.as_ref().unwrap() + } +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CreatePipelineLayoutError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Bind group layout {0:?} is invalid")] + InvalidBindGroupLayout(BindGroupLayoutId), + #[error( + "Push constant at index {index} has range bound {bound} not aligned to {}", + wgt::PUSH_CONSTANT_ALIGNMENT + )] + MisalignedPushConstantRange { index: usize, bound: u32 }, + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), + #[error("Push constant range (index {index}) provides for stage(s) {provided:?} but there exists another range that provides stage(s) {intersected:?}. Each stage may only be provided by one range")] + MoreThanOnePushConstantRangePerStage { + index: usize, + provided: wgt::ShaderStages, + intersected: wgt::ShaderStages, + }, + #[error("Push constant at index {index} has range {}..{} which exceeds device push constant size limit 0..{max}", range.start, range.end)] + PushConstantRangeTooLarge { + index: usize, + range: Range<u32>, + max: u32, + }, + #[error(transparent)] + TooManyBindings(BindingTypeMaxCountError), + #[error("Bind group layout count {actual} exceeds device bind group limit {max}")] + TooManyGroups { actual: usize, max: usize }, +} + +impl PrettyError for CreatePipelineLayoutError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + if let Self::InvalidBindGroupLayout(id) = *self { + fmt.bind_group_layout_label(&id); + }; + } +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum PushConstantUploadError { + #[error("Provided push constant with indices {offset}..{end_offset} overruns matching push constant range at index {idx}, with stage(s) {:?} and indices {:?}", range.stages, range.range)] + TooLarge { + offset: u32, + end_offset: u32, + idx: usize, + range: wgt::PushConstantRange, + }, + #[error("Provided push constant is for stage(s) {actual:?}, stage with a partial match found at index {idx} with stage(s) {matched:?}, however push constants must be complete matches")] + PartialRangeMatch { + actual: wgt::ShaderStages, + idx: usize, + matched: wgt::ShaderStages, + }, + #[error("Provided push constant is for stage(s) {actual:?}, but intersects a push constant range (at index {idx}) with stage(s) {missing:?}. Push constants must provide the stages for all ranges they intersect")] + MissingStages { + actual: wgt::ShaderStages, + idx: usize, + missing: wgt::ShaderStages, + }, + #[error("Provided push constant is for stage(s) {actual:?}, however the pipeline layout has no push constant range for the stage(s) {unmatched:?}")] + UnmatchedStages { + actual: wgt::ShaderStages, + unmatched: wgt::ShaderStages, + }, + #[error("Provided push constant offset {0} does not respect `PUSH_CONSTANT_ALIGNMENT`")] + Unaligned(u32), +} + +/// Describes a pipeline layout. +/// +/// A `PipelineLayoutDescriptor` can be used to create a pipeline layout. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct PipelineLayoutDescriptor<'a> { + /// Debug label of the pipeline layout. + /// + /// This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// Bind groups that this pipeline uses. The first entry will provide all the bindings for + /// "set = 0", second entry will provide all the bindings for "set = 1" etc. + pub bind_group_layouts: Cow<'a, [BindGroupLayoutId]>, + /// Set of push constant ranges this pipeline uses. Each shader stage that + /// uses push constants must define the range in push constant memory that + /// corresponds to its single `layout(push_constant)` uniform block. + /// + /// If this array is non-empty, the + /// [`Features::PUSH_CONSTANTS`](wgt::Features::PUSH_CONSTANTS) feature must + /// be enabled. + pub push_constant_ranges: Cow<'a, [wgt::PushConstantRange]>, +} + +#[derive(Debug)] +pub struct PipelineLayout<A: HalApi> { + pub(crate) raw: Option<A::PipelineLayout>, + pub(crate) device: Arc<Device<A>>, + pub(crate) info: ResourceInfo<PipelineLayout<A>>, + pub(crate) bind_group_layouts: ArrayVec<Arc<BindGroupLayout<A>>, { hal::MAX_BIND_GROUPS }>, + pub(crate) push_constant_ranges: ArrayVec<wgt::PushConstantRange, { SHADER_STAGE_COUNT }>, +} + +impl<A: HalApi> Drop for PipelineLayout<A> { + fn drop(&mut self) { + if let Some(raw) = self.raw.take() { + resource_log!("Destroy raw PipelineLayout {:?}", self.info.label()); + + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyPipelineLayout(self.info.id())); + } + + unsafe { + use hal::Device; + self.device.raw().destroy_pipeline_layout(raw); + } + } + } +} + +impl<A: HalApi> PipelineLayout<A> { + pub(crate) fn raw(&self) -> &A::PipelineLayout { + self.raw.as_ref().unwrap() + } + + pub(crate) fn get_binding_maps(&self) -> ArrayVec<&bgl::EntryMap, { hal::MAX_BIND_GROUPS }> { + self.bind_group_layouts + .iter() + .map(|bgl| &bgl.entries) + .collect() + } + + /// Validate push constants match up with expected ranges. + pub(crate) fn validate_push_constant_ranges( + &self, + stages: wgt::ShaderStages, + offset: u32, + end_offset: u32, + ) -> Result<(), PushConstantUploadError> { + // Don't need to validate size against the push constant size limit here, + // as push constant ranges are already validated to be within bounds, + // and we validate that they are within the ranges. + + if offset % wgt::PUSH_CONSTANT_ALIGNMENT != 0 { + return Err(PushConstantUploadError::Unaligned(offset)); + } + + // Push constant validation looks very complicated on the surface, but + // the problem can be range-reduced pretty well. + // + // Push constants require (summarized from the vulkan spec): + // 1. For each byte in the range and for each shader stage in stageFlags, + // there must be a push constant range in the layout that includes that + // byte and that stage. + // 2. For each byte in the range and for each push constant range that overlaps that byte, + // `stage` must include all stages in that push constant range’s `stage`. + // + // However there are some additional constraints that help us: + // 3. All push constant ranges are the only range that can access that stage. + // i.e. if one range has VERTEX, no other range has VERTEX + // + // Therefore we can simplify the checks in the following ways: + // - Because 3 guarantees that the push constant range has a unique stage, + // when we check for 1, we can simply check that our entire updated range + // is within a push constant range. i.e. our range for a specific stage cannot + // intersect more than one push constant range. + let mut used_stages = wgt::ShaderStages::NONE; + for (idx, range) in self.push_constant_ranges.iter().enumerate() { + // contains not intersects due to 2 + if stages.contains(range.stages) { + if !(range.range.start <= offset && end_offset <= range.range.end) { + return Err(PushConstantUploadError::TooLarge { + offset, + end_offset, + idx, + range: range.clone(), + }); + } + used_stages |= range.stages; + } else if stages.intersects(range.stages) { + // Will be caught by used stages check below, but we can do this because of 1 + // and is more helpful to the user. + return Err(PushConstantUploadError::PartialRangeMatch { + actual: stages, + idx, + matched: range.stages, + }); + } + + // The push constant range intersects range we are uploading + if offset < range.range.end && range.range.start < end_offset { + // But requires stages we don't provide + if !stages.contains(range.stages) { + return Err(PushConstantUploadError::MissingStages { + actual: stages, + idx, + missing: stages, + }); + } + } + } + if used_stages != stages { + return Err(PushConstantUploadError::UnmatchedStages { + actual: stages, + unmatched: stages - used_stages, + }); + } + Ok(()) + } +} + +impl<A: HalApi> Resource for PipelineLayout<A> { + const TYPE: ResourceType = "PipelineLayout"; + + type Marker = crate::id::markers::PipelineLayout; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +#[repr(C)] +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct BufferBinding { + pub buffer_id: BufferId, + pub offset: wgt::BufferAddress, + pub size: Option<wgt::BufferSize>, +} + +// Note: Duplicated in `wgpu-rs` as `BindingResource` +// They're different enough that it doesn't make sense to share a common type +#[derive(Debug, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum BindingResource<'a> { + Buffer(BufferBinding), + BufferArray(Cow<'a, [BufferBinding]>), + Sampler(SamplerId), + SamplerArray(Cow<'a, [SamplerId]>), + TextureView(TextureViewId), + TextureViewArray(Cow<'a, [TextureViewId]>), +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum BindError { + #[error( + "Bind group {group} expects {expected} dynamic offset{s0}. However {actual} dynamic offset{s1} were provided.", + s0 = if *.expected >= 2 { "s" } else { "" }, + s1 = if *.actual >= 2 { "s" } else { "" }, + )] + MismatchedDynamicOffsetCount { + group: u32, + actual: usize, + expected: usize, + }, + #[error( + "Dynamic binding index {idx} (targeting bind group {group}, binding {binding}) with value {offset}, does not respect device's requested `{limit_name}` limit: {alignment}" + )] + UnalignedDynamicBinding { + idx: usize, + group: u32, + binding: u32, + offset: u32, + alignment: u32, + limit_name: &'static str, + }, + #[error( + "Dynamic binding offset index {idx} with offset {offset} would overrun the buffer bound to bind group {group} -> binding {binding}. \ + Buffer size is {buffer_size} bytes, the binding binds bytes {binding_range:?}, meaning the maximum the binding can be offset is {maximum_dynamic_offset} bytes", + )] + DynamicBindingOutOfBounds { + idx: usize, + group: u32, + binding: u32, + offset: u32, + buffer_size: wgt::BufferAddress, + binding_range: Range<wgt::BufferAddress>, + maximum_dynamic_offset: wgt::BufferAddress, + }, +} + +#[derive(Debug)] +pub struct BindGroupDynamicBindingData { + /// The index of the binding. + /// + /// Used for more descriptive errors. + pub(crate) binding_idx: u32, + /// The size of the buffer. + /// + /// Used for more descriptive errors. + pub(crate) buffer_size: wgt::BufferAddress, + /// The range that the binding covers. + /// + /// Used for more descriptive errors. + pub(crate) binding_range: Range<wgt::BufferAddress>, + /// The maximum value the dynamic offset can have before running off the end of the buffer. + pub(crate) maximum_dynamic_offset: wgt::BufferAddress, + /// The binding type. + pub(crate) binding_type: wgt::BufferBindingType, +} + +pub(crate) fn buffer_binding_type_alignment( + limits: &wgt::Limits, + binding_type: wgt::BufferBindingType, +) -> (u32, &'static str) { + match binding_type { + wgt::BufferBindingType::Uniform => ( + limits.min_uniform_buffer_offset_alignment, + "min_uniform_buffer_offset_alignment", + ), + wgt::BufferBindingType::Storage { .. } => ( + limits.min_storage_buffer_offset_alignment, + "min_storage_buffer_offset_alignment", + ), + } +} + +#[derive(Debug)] +pub struct BindGroup<A: HalApi> { + pub(crate) raw: Snatchable<A::BindGroup>, + pub(crate) device: Arc<Device<A>>, + pub(crate) layout: Arc<BindGroupLayout<A>>, + pub(crate) info: ResourceInfo<BindGroup<A>>, + pub(crate) used: BindGroupStates<A>, + pub(crate) used_buffer_ranges: Vec<BufferInitTrackerAction<A>>, + pub(crate) used_texture_ranges: Vec<TextureInitTrackerAction<A>>, + pub(crate) dynamic_binding_info: Vec<BindGroupDynamicBindingData>, + /// Actual binding sizes for buffers that don't have `min_binding_size` + /// specified in BGL. Listed in the order of iteration of `BGL.entries`. + pub(crate) late_buffer_binding_sizes: Vec<wgt::BufferSize>, +} + +impl<A: HalApi> Drop for BindGroup<A> { + fn drop(&mut self) { + if let Some(raw) = self.raw.take() { + resource_log!("Destroy raw BindGroup {:?}", self.info.label()); + + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyBindGroup(self.info.id())); + } + + unsafe { + use hal::Device; + self.device.raw().destroy_bind_group(raw); + } + } + } +} + +impl<A: HalApi> BindGroup<A> { + pub(crate) fn raw(&self, guard: &SnatchGuard) -> Option<&A::BindGroup> { + // Clippy insist on writing it this way. The idea is to return None + // if any of the raw buffer is not valid anymore. + for buffer in &self.used_buffer_ranges { + let _ = buffer.buffer.raw(guard)?; + } + for texture in &self.used_texture_ranges { + let _ = texture.texture.raw(guard)?; + } + self.raw.get(guard) + } + pub(crate) fn validate_dynamic_bindings( + &self, + bind_group_index: u32, + offsets: &[wgt::DynamicOffset], + limits: &wgt::Limits, + ) -> Result<(), BindError> { + if self.dynamic_binding_info.len() != offsets.len() { + return Err(BindError::MismatchedDynamicOffsetCount { + group: bind_group_index, + expected: self.dynamic_binding_info.len(), + actual: offsets.len(), + }); + } + + for (idx, (info, &offset)) in self + .dynamic_binding_info + .iter() + .zip(offsets.iter()) + .enumerate() + { + let (alignment, limit_name) = buffer_binding_type_alignment(limits, info.binding_type); + if offset as wgt::BufferAddress % alignment as u64 != 0 { + return Err(BindError::UnalignedDynamicBinding { + group: bind_group_index, + binding: info.binding_idx, + idx, + offset, + alignment, + limit_name, + }); + } + + if offset as wgt::BufferAddress > info.maximum_dynamic_offset { + return Err(BindError::DynamicBindingOutOfBounds { + group: bind_group_index, + binding: info.binding_idx, + idx, + offset, + buffer_size: info.buffer_size, + binding_range: info.binding_range.clone(), + maximum_dynamic_offset: info.maximum_dynamic_offset, + }); + } + } + + Ok(()) + } +} + +impl<A: HalApi> Resource for BindGroup<A> { + const TYPE: ResourceType = "BindGroup"; + + type Marker = crate::id::markers::BindGroup; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum GetBindGroupLayoutError { + #[error("Pipeline is invalid")] + InvalidPipeline, + #[error("Invalid group index {0}")] + InvalidGroupIndex(u32), +} + +#[derive(Clone, Debug, Error, Eq, PartialEq)] +#[error("Buffer is bound with size {bound_size} where the shader expects {shader_size} in group[{group_index}] compact index {compact_index}")] +pub struct LateMinBufferBindingSizeMismatch { + pub group_index: u32, + pub compact_index: usize, + pub shader_size: wgt::BufferAddress, + pub bound_size: wgt::BufferAddress, +} diff --git a/third_party/rust/wgpu-core/src/command/bind.rs b/third_party/rust/wgpu-core/src/command/bind.rs new file mode 100644 index 0000000000..7b2ac54552 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/bind.rs @@ -0,0 +1,451 @@ +use std::sync::Arc; + +use crate::{ + binding_model::{BindGroup, LateMinBufferBindingSizeMismatch, PipelineLayout}, + device::SHADER_STAGE_COUNT, + hal_api::HalApi, + id::BindGroupId, + pipeline::LateSizedBufferGroup, + resource::Resource, +}; + +use arrayvec::ArrayVec; + +type BindGroupMask = u8; + +mod compat { + use arrayvec::ArrayVec; + + use crate::{binding_model::BindGroupLayout, device::bgl, hal_api::HalApi, resource::Resource}; + use std::{ops::Range, sync::Arc}; + + #[derive(Debug, Clone)] + struct Entry<A: HalApi> { + assigned: Option<Arc<BindGroupLayout<A>>>, + expected: Option<Arc<BindGroupLayout<A>>>, + } + + impl<A: HalApi> Entry<A> { + fn empty() -> Self { + Self { + assigned: None, + expected: None, + } + } + fn is_active(&self) -> bool { + self.assigned.is_some() && self.expected.is_some() + } + + fn is_valid(&self) -> bool { + if self.expected.is_none() { + return true; + } + if let Some(expected_bgl) = self.expected.as_ref() { + if let Some(assigned_bgl) = self.assigned.as_ref() { + if expected_bgl.is_equal(assigned_bgl) { + return true; + } + } + } + false + } + + fn is_incompatible(&self) -> bool { + self.expected.is_none() || !self.is_valid() + } + + // Describe how bind group layouts are incompatible, for validation + // error message. + fn bgl_diff(&self) -> Vec<String> { + let mut diff = Vec::new(); + + if let Some(expected_bgl) = self.expected.as_ref() { + let expected_bgl_type = match expected_bgl.origin { + bgl::Origin::Derived => "implicit", + bgl::Origin::Pool => "explicit", + }; + let expected_label = expected_bgl.label(); + diff.push(format!( + "Should be compatible an with an {expected_bgl_type} bind group layout {}", + if expected_label.is_empty() { + "without label".to_string() + } else { + format!("with label = `{}`", expected_label) + } + )); + if let Some(assigned_bgl) = self.assigned.as_ref() { + let assigned_bgl_type = match assigned_bgl.origin { + bgl::Origin::Derived => "implicit", + bgl::Origin::Pool => "explicit", + }; + let assigned_label = assigned_bgl.label(); + diff.push(format!( + "Assigned {assigned_bgl_type} bind group layout {}", + if assigned_label.is_empty() { + "without label".to_string() + } else { + format!("with label = `{}`", assigned_label) + } + )); + for (id, e_entry) in expected_bgl.entries.iter() { + if let Some(a_entry) = assigned_bgl.entries.get(*id) { + if a_entry.binding != e_entry.binding { + diff.push(format!( + "Entry {id} binding expected {}, got {}", + e_entry.binding, a_entry.binding + )); + } + if a_entry.count != e_entry.count { + diff.push(format!( + "Entry {id} count expected {:?}, got {:?}", + e_entry.count, a_entry.count + )); + } + if a_entry.ty != e_entry.ty { + diff.push(format!( + "Entry {id} type expected {:?}, got {:?}", + e_entry.ty, a_entry.ty + )); + } + if a_entry.visibility != e_entry.visibility { + diff.push(format!( + "Entry {id} visibility expected {:?}, got {:?}", + e_entry.visibility, a_entry.visibility + )); + } + } else { + diff.push(format!( + "Entry {id} not found in assigned bind group layout" + )) + } + } + + assigned_bgl.entries.iter().for_each(|(id, _e_entry)| { + if !expected_bgl.entries.contains_key(*id) { + diff.push(format!( + "Entry {id} not found in expected bind group layout" + )) + } + }); + + if expected_bgl.origin != assigned_bgl.origin { + diff.push(format!("Expected {expected_bgl_type} bind group layout, got {assigned_bgl_type}")) + } + } else { + diff.push("Assigned bind group layout not found (internal error)".to_owned()); + } + } else { + diff.push("Expected bind group layout not found (internal error)".to_owned()); + } + + diff + } + } + + #[derive(Debug, Default)] + pub(crate) struct BoundBindGroupLayouts<A: HalApi> { + entries: ArrayVec<Entry<A>, { hal::MAX_BIND_GROUPS }>, + } + + impl<A: HalApi> BoundBindGroupLayouts<A> { + pub fn new() -> Self { + Self { + entries: (0..hal::MAX_BIND_GROUPS).map(|_| Entry::empty()).collect(), + } + } + fn make_range(&self, start_index: usize) -> Range<usize> { + // find first incompatible entry + let end = self + .entries + .iter() + .position(|e| e.is_incompatible()) + .unwrap_or(self.entries.len()); + start_index..end.max(start_index) + } + + pub fn update_expectations( + &mut self, + expectations: &[Arc<BindGroupLayout<A>>], + ) -> Range<usize> { + let start_index = self + .entries + .iter() + .zip(expectations) + .position(|(e, expect)| { + e.expected.is_none() || !e.expected.as_ref().unwrap().is_equal(expect) + }) + .unwrap_or(expectations.len()); + for (e, expect) in self.entries[start_index..] + .iter_mut() + .zip(expectations[start_index..].iter()) + { + e.expected = Some(expect.clone()); + } + for e in self.entries[expectations.len()..].iter_mut() { + e.expected = None; + } + self.make_range(start_index) + } + + pub fn assign(&mut self, index: usize, value: Arc<BindGroupLayout<A>>) -> Range<usize> { + self.entries[index].assigned = Some(value); + self.make_range(index) + } + + pub fn list_active(&self) -> impl Iterator<Item = usize> + '_ { + self.entries + .iter() + .enumerate() + .filter_map(|(i, e)| if e.is_active() { Some(i) } else { None }) + } + + pub fn invalid_mask(&self) -> super::BindGroupMask { + self.entries.iter().enumerate().fold(0, |mask, (i, entry)| { + if entry.is_valid() { + mask + } else { + mask | 1u8 << i + } + }) + } + + pub fn bgl_diff(&self) -> Vec<String> { + for e in &self.entries { + if !e.is_valid() { + return e.bgl_diff(); + } + } + vec![String::from("No differences detected? (internal error)")] + } + } +} + +#[derive(Debug)] +struct LateBufferBinding { + shader_expect_size: wgt::BufferAddress, + bound_size: wgt::BufferAddress, +} + +#[derive(Debug)] +pub(super) struct EntryPayload<A: HalApi> { + pub(super) group: Option<Arc<BindGroup<A>>>, + pub(super) dynamic_offsets: Vec<wgt::DynamicOffset>, + late_buffer_bindings: Vec<LateBufferBinding>, + /// Since `LateBufferBinding` may contain information about the bindings + /// not used by the pipeline, we need to know when to stop validating. + pub(super) late_bindings_effective_count: usize, +} + +impl<A: HalApi> Default for EntryPayload<A> { + fn default() -> Self { + Self { + group: None, + dynamic_offsets: Default::default(), + late_buffer_bindings: Default::default(), + late_bindings_effective_count: Default::default(), + } + } +} + +impl<A: HalApi> EntryPayload<A> { + fn reset(&mut self) { + self.group = None; + self.dynamic_offsets.clear(); + self.late_buffer_bindings.clear(); + self.late_bindings_effective_count = 0; + } +} + +#[derive(Debug, Default)] +pub(super) struct Binder<A: HalApi> { + pub(super) pipeline_layout: Option<Arc<PipelineLayout<A>>>, + manager: compat::BoundBindGroupLayouts<A>, + payloads: [EntryPayload<A>; hal::MAX_BIND_GROUPS], +} + +impl<A: HalApi> Binder<A> { + pub(super) fn new() -> Self { + Self { + pipeline_layout: None, + manager: compat::BoundBindGroupLayouts::new(), + payloads: Default::default(), + } + } + pub(super) fn reset(&mut self) { + self.pipeline_layout = None; + self.manager = compat::BoundBindGroupLayouts::new(); + for payload in self.payloads.iter_mut() { + payload.reset(); + } + } + + pub(super) fn change_pipeline_layout<'a>( + &'a mut self, + new: &Arc<PipelineLayout<A>>, + late_sized_buffer_groups: &[LateSizedBufferGroup], + ) -> (usize, &'a [EntryPayload<A>]) { + let old_id_opt = self.pipeline_layout.replace(new.clone()); + + let mut bind_range = self.manager.update_expectations(&new.bind_group_layouts); + + // Update the buffer binding sizes that are required by shaders. + for (payload, late_group) in self.payloads.iter_mut().zip(late_sized_buffer_groups) { + payload.late_bindings_effective_count = late_group.shader_sizes.len(); + for (late_binding, &shader_expect_size) in payload + .late_buffer_bindings + .iter_mut() + .zip(late_group.shader_sizes.iter()) + { + late_binding.shader_expect_size = shader_expect_size; + } + if late_group.shader_sizes.len() > payload.late_buffer_bindings.len() { + for &shader_expect_size in + late_group.shader_sizes[payload.late_buffer_bindings.len()..].iter() + { + payload.late_buffer_bindings.push(LateBufferBinding { + shader_expect_size, + bound_size: 0, + }); + } + } + } + + if let Some(old) = old_id_opt { + // root constants are the base compatibility property + if old.push_constant_ranges != new.push_constant_ranges { + bind_range.start = 0; + } + } + + (bind_range.start, &self.payloads[bind_range]) + } + + pub(super) fn assign_group<'a>( + &'a mut self, + index: usize, + bind_group: &Arc<BindGroup<A>>, + offsets: &[wgt::DynamicOffset], + ) -> &'a [EntryPayload<A>] { + let bind_group_id = bind_group.as_info().id(); + log::trace!("\tBinding [{}] = group {:?}", index, bind_group_id); + debug_assert_eq!(A::VARIANT, bind_group_id.backend()); + + let payload = &mut self.payloads[index]; + payload.group = Some(bind_group.clone()); + payload.dynamic_offsets.clear(); + payload.dynamic_offsets.extend_from_slice(offsets); + + // Fill out the actual binding sizes for buffers, + // whose layout doesn't specify `min_binding_size`. + for (late_binding, late_size) in payload + .late_buffer_bindings + .iter_mut() + .zip(bind_group.late_buffer_binding_sizes.iter()) + { + late_binding.bound_size = late_size.get(); + } + if bind_group.late_buffer_binding_sizes.len() > payload.late_buffer_bindings.len() { + for late_size in + bind_group.late_buffer_binding_sizes[payload.late_buffer_bindings.len()..].iter() + { + payload.late_buffer_bindings.push(LateBufferBinding { + shader_expect_size: 0, + bound_size: late_size.get(), + }); + } + } + + let bind_range = self.manager.assign(index, bind_group.layout.clone()); + &self.payloads[bind_range] + } + + pub(super) fn list_active(&self) -> impl Iterator<Item = BindGroupId> + '_ { + let payloads = &self.payloads; + self.manager + .list_active() + .map(move |index| payloads[index].group.as_ref().unwrap().as_info().id()) + } + + pub(super) fn invalid_mask(&self) -> BindGroupMask { + self.manager.invalid_mask() + } + + pub(super) fn bgl_diff(&self) -> Vec<String> { + self.manager.bgl_diff() + } + + /// Scan active buffer bindings corresponding to layouts without `min_binding_size` specified. + pub(super) fn check_late_buffer_bindings( + &self, + ) -> Result<(), LateMinBufferBindingSizeMismatch> { + for group_index in self.manager.list_active() { + let payload = &self.payloads[group_index]; + for (compact_index, late_binding) in payload.late_buffer_bindings + [..payload.late_bindings_effective_count] + .iter() + .enumerate() + { + if late_binding.bound_size < late_binding.shader_expect_size { + return Err(LateMinBufferBindingSizeMismatch { + group_index: group_index as u32, + compact_index, + shader_size: late_binding.shader_expect_size, + bound_size: late_binding.bound_size, + }); + } + } + } + Ok(()) + } +} + +struct PushConstantChange { + stages: wgt::ShaderStages, + offset: u32, + enable: bool, +} + +/// Break up possibly overlapping push constant ranges into a set of +/// non-overlapping ranges which contain all the stage flags of the +/// original ranges. This allows us to zero out (or write any value) +/// to every possible value. +pub fn compute_nonoverlapping_ranges( + ranges: &[wgt::PushConstantRange], +) -> ArrayVec<wgt::PushConstantRange, { SHADER_STAGE_COUNT * 2 }> { + if ranges.is_empty() { + return ArrayVec::new(); + } + debug_assert!(ranges.len() <= SHADER_STAGE_COUNT); + + let mut breaks: ArrayVec<PushConstantChange, { SHADER_STAGE_COUNT * 2 }> = ArrayVec::new(); + for range in ranges { + breaks.push(PushConstantChange { + stages: range.stages, + offset: range.range.start, + enable: true, + }); + breaks.push(PushConstantChange { + stages: range.stages, + offset: range.range.end, + enable: false, + }); + } + breaks.sort_unstable_by_key(|change| change.offset); + + let mut output_ranges = ArrayVec::new(); + let mut position = 0_u32; + let mut stages = wgt::ShaderStages::NONE; + + for bk in breaks { + if bk.offset - position > 0 && !stages.is_empty() { + output_ranges.push(wgt::PushConstantRange { + stages, + range: position..bk.offset, + }) + } + position = bk.offset; + stages.set(bk.stages, bk.enable); + } + + output_ranges +} diff --git a/third_party/rust/wgpu-core/src/command/bundle.rs b/third_party/rust/wgpu-core/src/command/bundle.rs new file mode 100644 index 0000000000..9d80c62f85 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/bundle.rs @@ -0,0 +1,1722 @@ +/*! Render Bundles + +A render bundle is a prerecorded sequence of commands that can be replayed on a +command encoder with a single call. A single bundle can replayed any number of +times, on different encoders. Constructing a render bundle lets `wgpu` validate +and analyze its commands up front, so that replaying a bundle can be more +efficient than simply re-recording its commands each time. + +Not all commands are available in bundles; for example, a render bundle may not +contain a [`RenderCommand::SetViewport`] command. + +Most of `wgpu`'s backend graphics APIs have something like bundles. For example, +Vulkan calls them "secondary command buffers", and Metal calls them "indirect +command buffers". Although we plan to take advantage of these platform features +at some point in the future, for now `wgpu`'s implementation of render bundles +does not use them: at the hal level, `wgpu` render bundles just replay the +commands. + +## Render Bundle Isolation + +One important property of render bundles is that the draw calls in a render +bundle depend solely on the pipeline and state established within the render +bundle itself. A draw call in a bundle will never use a vertex buffer, say, that +was set in the `RenderPass` before executing the bundle. We call this property +'isolation', in that a render bundle is somewhat isolated from the passes that +use it. + +Render passes are also isolated from the effects of bundles. After executing a +render bundle, a render pass's pipeline, bind groups, and vertex and index +buffers are are unset, so the bundle cannot affect later draw calls in the pass. + +A render pass is not fully isolated from a bundle's effects on push constant +values. Draw calls following a bundle's execution will see whatever values the +bundle writes to push constant storage. Setting a pipeline initializes any push +constant storage it could access to zero, and this initialization may also be +visible after bundle execution. + +## Render Bundle Lifecycle + +To create a render bundle: + +1) Create a [`RenderBundleEncoder`] by calling + [`Global::device_create_render_bundle_encoder`][Gdcrbe]. + +2) Record commands in the `RenderBundleEncoder` using functions from the + [`bundle_ffi`] module. + +3) Call [`Global::render_bundle_encoder_finish`][Grbef], which analyzes and cleans up + the command stream and returns a `RenderBundleId`. + +4) Then, any number of times, call [`wgpu_render_pass_execute_bundles`][wrpeb] to + execute the bundle as part of some render pass. + +## Implementation + +The most complex part of render bundles is the "finish" step, mostly implemented +in [`RenderBundleEncoder::finish`]. This consumes the commands stored in the +encoder's [`BasePass`], while validating everything, tracking the state, +dropping redundant or unnecessary commands, and presenting the results as a new +[`RenderBundle`]. It doesn't actually execute any commands. + +This step also enforces the 'isolation' property mentioned above: every draw +call is checked to ensure that the resources it uses on were established since +the last time the pipeline was set. This means the bundle can be executed +verbatim without any state tracking. + +### Execution + +When the bundle is used in an actual render pass, `RenderBundle::execute` is +called. It goes through the commands and issues them into the native command +buffer. Thanks to isolation, it doesn't track any bind group invalidations or +index format changes. + +[Gdcrbe]: crate::global::Global::device_create_render_bundle_encoder +[Grbef]: crate::global::Global::render_bundle_encoder_finish +[wrpeb]: crate::command::render_ffi::wgpu_render_pass_execute_bundles +!*/ + +#![allow(clippy::reversed_empty_ranges)] + +#[cfg(feature = "trace")] +use crate::device::trace; +use crate::{ + binding_model::{buffer_binding_type_alignment, BindGroup, BindGroupLayout, PipelineLayout}, + command::{ + BasePass, BindGroupStateChange, ColorAttachmentError, DrawError, MapPassErr, + PassErrorScope, RenderCommand, RenderCommandError, StateChange, + }, + conv, + device::{ + AttachmentData, Device, DeviceError, MissingDownlevelFlags, + RenderPassCompatibilityCheckType, RenderPassContext, SHADER_STAGE_COUNT, + }, + error::{ErrorFormatter, PrettyError}, + hal_api::HalApi, + hub::Hub, + id, + init_tracker::{BufferInitTrackerAction, MemoryInitKind, TextureInitTrackerAction}, + pipeline::{PipelineFlags, RenderPipeline, VertexStep}, + resource::{Resource, ResourceInfo, ResourceType}, + resource_log, + track::RenderBundleScope, + validation::check_buffer_usage, + Label, LabelHelpers, +}; +use arrayvec::ArrayVec; + +use std::{borrow::Cow, mem, num::NonZeroU32, ops::Range, sync::Arc}; +use thiserror::Error; + +use hal::CommandEncoder as _; + +/// https://gpuweb.github.io/gpuweb/#dom-gpurendercommandsmixin-draw +fn validate_draw( + vertex: &[Option<VertexState>], + step: &[VertexStep], + first_vertex: u32, + vertex_count: u32, + first_instance: u32, + instance_count: u32, +) -> Result<(), DrawError> { + let vertices_end = first_vertex as u64 + vertex_count as u64; + let instances_end = first_instance as u64 + instance_count as u64; + + for (idx, (vbs, step)) in vertex.iter().zip(step).enumerate() { + let Some(vbs) = vbs else { + continue; + }; + + let stride_count = match step.mode { + wgt::VertexStepMode::Vertex => vertices_end, + wgt::VertexStepMode::Instance => instances_end, + }; + + if stride_count == 0 { + continue; + } + + let offset = (stride_count - 1) * step.stride + step.last_stride; + let limit = vbs.range.end - vbs.range.start; + if offset > limit { + return Err(DrawError::VertexOutOfBounds { + step_mode: step.mode, + offset, + limit, + slot: idx as u32, + }); + } + } + + Ok(()) +} + +// See https://gpuweb.github.io/gpuweb/#dom-gpurendercommandsmixin-drawindexed +fn validate_indexed_draw( + vertex: &[Option<VertexState>], + step: &[VertexStep], + index_state: &IndexState, + first_index: u32, + index_count: u32, + first_instance: u32, + instance_count: u32, +) -> Result<(), DrawError> { + let last_index = first_index as u64 + index_count as u64; + let index_limit = index_state.limit(); + if last_index <= index_limit { + return Err(DrawError::IndexBeyondLimit { + last_index, + index_limit, + }); + } + + let stride_count = first_instance as u64 + instance_count as u64; + for (idx, (vbs, step)) in vertex.iter().zip(step).enumerate() { + let Some(vbs) = vbs else { + continue; + }; + + if stride_count == 0 || step.mode != wgt::VertexStepMode::Instance { + continue; + } + + let offset = (stride_count - 1) * step.stride + step.last_stride; + let limit = vbs.range.end - vbs.range.start; + if offset > limit { + return Err(DrawError::VertexOutOfBounds { + step_mode: step.mode, + offset, + limit, + slot: idx as u32, + }); + } + } + + Ok(()) +} + +/// Describes a [`RenderBundleEncoder`]. +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct RenderBundleEncoderDescriptor<'a> { + /// Debug label of the render bundle encoder. + /// + /// This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// The formats of the color attachments that this render bundle is capable + /// to rendering to. + /// + /// This must match the formats of the color attachments in the + /// renderpass this render bundle is executed in. + pub color_formats: Cow<'a, [Option<wgt::TextureFormat>]>, + /// Information about the depth attachment that this render bundle is + /// capable to rendering to. + /// + /// The format must match the format of the depth attachments in the + /// renderpass this render bundle is executed in. + pub depth_stencil: Option<wgt::RenderBundleDepthStencil>, + /// Sample count this render bundle is capable of rendering to. + /// + /// This must match the pipelines and the renderpasses it is used in. + pub sample_count: u32, + /// If this render bundle will rendering to multiple array layers in the + /// attachments at the same time. + pub multiview: Option<NonZeroU32>, +} + +#[derive(Debug)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct RenderBundleEncoder { + base: BasePass<RenderCommand>, + parent_id: id::DeviceId, + pub(crate) context: RenderPassContext, + pub(crate) is_depth_read_only: bool, + pub(crate) is_stencil_read_only: bool, + + // Resource binding dedupe state. + #[cfg_attr(feature = "serde", serde(skip))] + current_bind_groups: BindGroupStateChange, + #[cfg_attr(feature = "serde", serde(skip))] + current_pipeline: StateChange<id::RenderPipelineId>, +} + +impl RenderBundleEncoder { + pub fn new( + desc: &RenderBundleEncoderDescriptor, + parent_id: id::DeviceId, + base: Option<BasePass<RenderCommand>>, + ) -> Result<Self, CreateRenderBundleError> { + let (is_depth_read_only, is_stencil_read_only) = match desc.depth_stencil { + Some(ds) => { + let aspects = hal::FormatAspects::from(ds.format); + ( + !aspects.contains(hal::FormatAspects::DEPTH) || ds.depth_read_only, + !aspects.contains(hal::FormatAspects::STENCIL) || ds.stencil_read_only, + ) + } + // There's no depth/stencil attachment, so these values just don't + // matter. Choose the most accommodating value, to simplify + // validation. + None => (true, true), + }; + + //TODO: validate that attachment formats are renderable, + // have expected aspects, support multisampling. + Ok(Self { + base: base.unwrap_or_else(|| BasePass::new(&desc.label)), + parent_id, + context: RenderPassContext { + attachments: AttachmentData { + colors: if desc.color_formats.len() > hal::MAX_COLOR_ATTACHMENTS { + return Err(CreateRenderBundleError::ColorAttachment( + ColorAttachmentError::TooMany { + given: desc.color_formats.len(), + limit: hal::MAX_COLOR_ATTACHMENTS, + }, + )); + } else { + desc.color_formats.iter().cloned().collect() + }, + resolves: ArrayVec::new(), + depth_stencil: desc.depth_stencil.map(|ds| ds.format), + }, + sample_count: { + let sc = desc.sample_count; + if sc == 0 || sc > 32 || !conv::is_power_of_two_u32(sc) { + return Err(CreateRenderBundleError::InvalidSampleCount(sc)); + } + sc + }, + multiview: desc.multiview, + }, + + is_depth_read_only, + is_stencil_read_only, + current_bind_groups: BindGroupStateChange::new(), + current_pipeline: StateChange::new(), + }) + } + + pub fn dummy(parent_id: id::DeviceId) -> Self { + Self { + base: BasePass::new(&None), + parent_id, + context: RenderPassContext { + attachments: AttachmentData { + colors: ArrayVec::new(), + resolves: ArrayVec::new(), + depth_stencil: None, + }, + sample_count: 0, + multiview: None, + }, + is_depth_read_only: false, + is_stencil_read_only: false, + + current_bind_groups: BindGroupStateChange::new(), + current_pipeline: StateChange::new(), + } + } + + #[cfg(feature = "trace")] + pub(crate) fn to_base_pass(&self) -> BasePass<RenderCommand> { + BasePass::from_ref(self.base.as_ref()) + } + + pub fn parent(&self) -> id::DeviceId { + self.parent_id + } + + /// Convert this encoder's commands into a [`RenderBundle`]. + /// + /// We want executing a [`RenderBundle`] to be quick, so we take + /// this opportunity to clean up the [`RenderBundleEncoder`]'s + /// command stream and gather metadata about it that will help + /// keep [`ExecuteBundle`] simple and fast. We remove redundant + /// commands (along with their side data), note resource usage, + /// and accumulate buffer and texture initialization actions. + /// + /// [`ExecuteBundle`]: RenderCommand::ExecuteBundle + pub(crate) fn finish<A: HalApi>( + self, + desc: &RenderBundleDescriptor, + device: &Arc<Device<A>>, + hub: &Hub<A>, + ) -> Result<RenderBundle<A>, RenderBundleError> { + let bind_group_guard = hub.bind_groups.read(); + let pipeline_guard = hub.render_pipelines.read(); + let query_set_guard = hub.query_sets.read(); + let buffer_guard = hub.buffers.read(); + let texture_guard = hub.textures.read(); + + let mut state = State { + trackers: RenderBundleScope::new( + &*buffer_guard, + &*texture_guard, + &*bind_group_guard, + &*pipeline_guard, + &*query_set_guard, + ), + pipeline: None, + bind: (0..hal::MAX_BIND_GROUPS).map(|_| None).collect(), + vertex: (0..hal::MAX_VERTEX_BUFFERS).map(|_| None).collect(), + index: None, + flat_dynamic_offsets: Vec::new(), + }; + let mut commands = Vec::new(); + let mut buffer_memory_init_actions = Vec::new(); + let mut texture_memory_init_actions = Vec::new(); + + let base = self.base.as_ref(); + let mut next_dynamic_offset = 0; + + for &command in base.commands { + match command { + RenderCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => { + let scope = PassErrorScope::SetBindGroup(bind_group_id); + + let bind_group = state + .trackers + .bind_groups + .write() + .add_single(&*bind_group_guard, bind_group_id) + .ok_or(RenderCommandError::InvalidBindGroup(bind_group_id)) + .map_pass_err(scope)?; + self.check_valid_to_use(bind_group.device.info.id()) + .map_pass_err(scope)?; + + let max_bind_groups = device.limits.max_bind_groups; + if index >= max_bind_groups { + return Err(RenderCommandError::BindGroupIndexOutOfRange { + index, + max: max_bind_groups, + }) + .map_pass_err(scope); + } + + // Identify the next `num_dynamic_offsets` entries from `base.dynamic_offsets`. + let num_dynamic_offsets = num_dynamic_offsets; + let offsets_range = + next_dynamic_offset..next_dynamic_offset + num_dynamic_offsets; + next_dynamic_offset = offsets_range.end; + let offsets = &base.dynamic_offsets[offsets_range.clone()]; + + if bind_group.dynamic_binding_info.len() != offsets.len() { + return Err(RenderCommandError::InvalidDynamicOffsetCount { + actual: offsets.len(), + expected: bind_group.dynamic_binding_info.len(), + }) + .map_pass_err(scope); + } + + // Check for misaligned offsets. + for (offset, info) in offsets + .iter() + .map(|offset| *offset as wgt::BufferAddress) + .zip(bind_group.dynamic_binding_info.iter()) + { + let (alignment, limit_name) = + buffer_binding_type_alignment(&device.limits, info.binding_type); + if offset % alignment as u64 != 0 { + return Err(RenderCommandError::UnalignedBufferOffset( + offset, limit_name, alignment, + )) + .map_pass_err(scope); + } + } + + buffer_memory_init_actions.extend_from_slice(&bind_group.used_buffer_ranges); + texture_memory_init_actions.extend_from_slice(&bind_group.used_texture_ranges); + + state.set_bind_group(index, bind_group_guard.get(bind_group_id).as_ref().unwrap(), &bind_group.layout, offsets_range); + unsafe { + state + .trackers + .merge_bind_group(&bind_group.used) + .map_pass_err(scope)? + }; + //Note: stateless trackers are not merged: the lifetime reference + // is held to the bind group itself. + } + RenderCommand::SetPipeline(pipeline_id) => { + let scope = PassErrorScope::SetPipelineRender(pipeline_id); + + let pipeline = state + .trackers + .render_pipelines + .write() + .add_single(&*pipeline_guard, pipeline_id) + .ok_or(RenderCommandError::InvalidPipeline(pipeline_id)) + .map_pass_err(scope)?; + self.check_valid_to_use(pipeline.device.info.id()) + .map_pass_err(scope)?; + + self.context + .check_compatible(&pipeline.pass_context, RenderPassCompatibilityCheckType::RenderPipeline) + .map_err(RenderCommandError::IncompatiblePipelineTargets) + .map_pass_err(scope)?; + + if (pipeline.flags.contains(PipelineFlags::WRITES_DEPTH) + && self.is_depth_read_only) + || (pipeline.flags.contains(PipelineFlags::WRITES_STENCIL) + && self.is_stencil_read_only) + { + return Err(RenderCommandError::IncompatiblePipelineRods) + .map_pass_err(scope); + } + + let pipeline_state = PipelineState::new(pipeline); + + commands.push(command); + + // If this pipeline uses push constants, zero out their values. + if let Some(iter) = pipeline_state.zero_push_constants() { + commands.extend(iter) + } + + state.invalidate_bind_groups(&pipeline_state, &pipeline.layout); + state.pipeline = Some(pipeline_state); + } + RenderCommand::SetIndexBuffer { + buffer_id, + index_format, + offset, + size, + } => { + let scope = PassErrorScope::SetIndexBuffer(buffer_id); + let buffer = state + .trackers + .buffers + .write() + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDEX) + .map_pass_err(scope)?; + self.check_valid_to_use(buffer.device.info.id()) + .map_pass_err(scope)?; + check_buffer_usage(buffer.usage, wgt::BufferUsages::INDEX) + .map_pass_err(scope)?; + + let end = match size { + Some(s) => offset + s.get(), + None => buffer.size, + }; + buffer_memory_init_actions.extend(buffer.initialization_status.read().create_action( + buffer, + offset..end, + MemoryInitKind::NeedsInitializedMemory, + )); + state.set_index_buffer(buffer_id, index_format, offset..end); + } + RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + } => { + let scope = PassErrorScope::SetVertexBuffer(buffer_id); + + let max_vertex_buffers = device.limits.max_vertex_buffers; + if slot >= max_vertex_buffers { + return Err(RenderCommandError::VertexBufferIndexOutOfRange { + index: slot, + max: max_vertex_buffers, + }) + .map_pass_err(scope); + } + + let buffer = state + .trackers + .buffers + .write() + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::VERTEX) + .map_pass_err(scope)?; + self.check_valid_to_use(buffer.device.info.id()) + .map_pass_err(scope)?; + check_buffer_usage(buffer.usage, wgt::BufferUsages::VERTEX) + .map_pass_err(scope)?; + + let end = match size { + Some(s) => offset + s.get(), + None => buffer.size, + }; + buffer_memory_init_actions.extend(buffer.initialization_status.read().create_action( + buffer, + offset..end, + MemoryInitKind::NeedsInitializedMemory, + )); + state.vertex[slot as usize] = Some(VertexState::new(buffer_id, offset..end)); + } + RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset: _, + } => { + let scope = PassErrorScope::SetPushConstant; + let end_offset = offset + size_bytes; + + let pipeline_state = state.pipeline(scope)?; + + pipeline_state.pipeline.layout + .validate_push_constant_ranges(stages, offset, end_offset) + .map_pass_err(scope)?; + + commands.push(command); + } + RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + } => { + let scope = PassErrorScope::Draw { + indexed: false, + indirect: false, + pipeline: state.pipeline_id(), + }; + let pipeline = state.pipeline(scope)?; + let used_bind_groups = pipeline.used_bind_groups; + + validate_draw( + &state.vertex[..], + &pipeline.steps, + first_vertex, + vertex_count, + first_instance, + instance_count, + ).map_pass_err(scope)?; + + if instance_count > 0 && vertex_count > 0 { + commands.extend(state.flush_vertices()); + commands.extend(state.flush_binds(used_bind_groups, base.dynamic_offsets)); + commands.push(command); + } + } + RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex: _, + first_instance, + } => { + let scope = PassErrorScope::Draw { + indexed: true, + indirect: false, + pipeline: state.pipeline_id(), + }; + let pipeline = state.pipeline(scope)?; + let used_bind_groups = pipeline.used_bind_groups; + let index = match state.index { + Some(ref index) => index, + None => return Err(DrawError::MissingIndexBuffer).map_pass_err(scope), + }; + + validate_indexed_draw( + &state.vertex[..], + &pipeline.steps, + index, + first_index, + index_count, + first_instance, + instance_count, + ).map_pass_err(scope)?; + + if instance_count > 0 && index_count > 0 { + commands.extend(state.flush_index()); + commands.extend(state.flush_vertices()); + commands.extend(state.flush_binds(used_bind_groups, base.dynamic_offsets)); + commands.push(command); + } + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: false, + } => { + let scope = PassErrorScope::Draw { + indexed: false, + indirect: true, + pipeline: state.pipeline_id(), + }; + device + .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) + .map_pass_err(scope)?; + + let pipeline = state.pipeline(scope)?; + let used_bind_groups = pipeline.used_bind_groups; + + let buffer = state + .trackers + .buffers + .write() + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDIRECT) + .map_pass_err(scope)?; + self.check_valid_to_use(buffer.device.info.id()) + .map_pass_err(scope)?; + check_buffer_usage(buffer.usage, wgt::BufferUsages::INDIRECT) + .map_pass_err(scope)?; + + buffer_memory_init_actions.extend(buffer.initialization_status.read().create_action( + buffer, + offset..(offset + mem::size_of::<wgt::DrawIndirectArgs>() as u64), + MemoryInitKind::NeedsInitializedMemory, + )); + + commands.extend(state.flush_vertices()); + commands.extend(state.flush_binds(used_bind_groups, base.dynamic_offsets)); + commands.push(command); + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: true, + } => { + let scope = PassErrorScope::Draw { + indexed: true, + indirect: true, + pipeline: state.pipeline_id(), + }; + device + .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) + .map_pass_err(scope)?; + + let pipeline = state.pipeline(scope)?; + let used_bind_groups = pipeline.used_bind_groups; + + let buffer = state + .trackers + .buffers + .write() + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDIRECT) + .map_pass_err(scope)?; + self.check_valid_to_use(buffer.device.info.id()) + .map_pass_err(scope)?; + check_buffer_usage(buffer.usage, wgt::BufferUsages::INDIRECT) + .map_pass_err(scope)?; + + buffer_memory_init_actions.extend(buffer.initialization_status.read().create_action( + buffer, + offset..(offset + mem::size_of::<wgt::DrawIndirectArgs>() as u64), + MemoryInitKind::NeedsInitializedMemory, + )); + + let index = match state.index { + Some(ref mut index) => index, + None => return Err(DrawError::MissingIndexBuffer).map_pass_err(scope), + }; + + commands.extend(index.flush()); + commands.extend(state.flush_vertices()); + commands.extend(state.flush_binds(used_bind_groups, base.dynamic_offsets)); + commands.push(command); + } + RenderCommand::MultiDrawIndirect { .. } + | RenderCommand::MultiDrawIndirectCount { .. } => unimplemented!(), + RenderCommand::PushDebugGroup { color: _, len: _ } => unimplemented!(), + RenderCommand::InsertDebugMarker { color: _, len: _ } => unimplemented!(), + RenderCommand::PopDebugGroup => unimplemented!(), + RenderCommand::WriteTimestamp { .. } // Must check the TIMESTAMP_QUERY_INSIDE_PASSES feature + | RenderCommand::BeginOcclusionQuery { .. } + | RenderCommand::EndOcclusionQuery + | RenderCommand::BeginPipelineStatisticsQuery { .. } + | RenderCommand::EndPipelineStatisticsQuery => unimplemented!(), + RenderCommand::ExecuteBundle(_) + | RenderCommand::SetBlendConstant(_) + | RenderCommand::SetStencilReference(_) + | RenderCommand::SetViewport { .. } + | RenderCommand::SetScissor(_) => unreachable!("not supported by a render bundle"), + } + } + + Ok(RenderBundle { + base: BasePass { + label: desc.label.as_ref().map(|cow| cow.to_string()), + commands, + dynamic_offsets: state.flat_dynamic_offsets, + string_data: Vec::new(), + push_constant_data: Vec::new(), + }, + is_depth_read_only: self.is_depth_read_only, + is_stencil_read_only: self.is_stencil_read_only, + device: device.clone(), + used: state.trackers, + buffer_memory_init_actions, + texture_memory_init_actions, + context: self.context, + info: ResourceInfo::new(desc.label.borrow_or_default()), + discard_hal_labels: device + .instance_flags + .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS), + }) + } + + fn check_valid_to_use(&self, device_id: id::DeviceId) -> Result<(), RenderBundleErrorInner> { + if device_id != self.parent_id { + return Err(RenderBundleErrorInner::NotValidToUse); + } + + Ok(()) + } + + pub fn set_index_buffer( + &mut self, + buffer_id: id::BufferId, + index_format: wgt::IndexFormat, + offset: wgt::BufferAddress, + size: Option<wgt::BufferSize>, + ) { + self.base.commands.push(RenderCommand::SetIndexBuffer { + buffer_id, + index_format, + offset, + size, + }); + } +} + +/// Error type returned from `RenderBundleEncoder::new` if the sample count is invalid. +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CreateRenderBundleError { + #[error(transparent)] + ColorAttachment(#[from] ColorAttachmentError), + #[error("Invalid number of samples {0}")] + InvalidSampleCount(u32), +} + +/// Error type returned from `RenderBundleEncoder::new` if the sample count is invalid. +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum ExecutionError { + #[error("Buffer {0:?} is destroyed")] + DestroyedBuffer(id::BufferId), + #[error("BindGroup {0:?} is invalid")] + InvalidBindGroup(id::BindGroupId), + #[error("Using {0} in a render bundle is not implemented")] + Unimplemented(&'static str), +} +impl PrettyError for ExecutionError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + match *self { + Self::DestroyedBuffer(id) => { + fmt.buffer_label(&id); + } + Self::InvalidBindGroup(id) => { + fmt.bind_group_label(&id); + } + Self::Unimplemented(_reason) => {} + }; + } +} + +pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor<Label<'a>>; + +//Note: here, `RenderBundle` is just wrapping a raw stream of render commands. +// The plan is to back it by an actual Vulkan secondary buffer, D3D12 Bundle, +// or Metal indirect command buffer. +#[derive(Debug)] +pub struct RenderBundle<A: HalApi> { + // Normalized command stream. It can be executed verbatim, + // without re-binding anything on the pipeline change. + base: BasePass<RenderCommand>, + pub(super) is_depth_read_only: bool, + pub(super) is_stencil_read_only: bool, + pub(crate) device: Arc<Device<A>>, + pub(crate) used: RenderBundleScope<A>, + pub(super) buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>, + pub(super) texture_memory_init_actions: Vec<TextureInitTrackerAction<A>>, + pub(super) context: RenderPassContext, + pub(crate) info: ResourceInfo<RenderBundle<A>>, + discard_hal_labels: bool, +} + +impl<A: HalApi> Drop for RenderBundle<A> { + fn drop(&mut self) { + resource_log!("Destroy raw RenderBundle {:?}", self.info.label()); + + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyRenderBundle(self.info.id())); + } + } +} + +#[cfg(send_sync)] +unsafe impl<A: HalApi> Send for RenderBundle<A> {} +#[cfg(send_sync)] +unsafe impl<A: HalApi> Sync for RenderBundle<A> {} + +impl<A: HalApi> RenderBundle<A> { + /// Actually encode the contents into a native command buffer. + /// + /// This is partially duplicating the logic of `command_encoder_run_render_pass`. + /// However the point of this function is to be lighter, since we already had + /// a chance to go through the commands in `render_bundle_encoder_finish`. + /// + /// Note that the function isn't expected to fail, generally. + /// All the validation has already been done by this point. + /// The only failure condition is if some of the used buffers are destroyed. + pub(super) unsafe fn execute(&self, raw: &mut A::CommandEncoder) -> Result<(), ExecutionError> { + let trackers = &self.used; + let mut offsets = self.base.dynamic_offsets.as_slice(); + let mut pipeline_layout = None::<Arc<PipelineLayout<A>>>; + if !self.discard_hal_labels { + if let Some(ref label) = self.base.label { + unsafe { raw.begin_debug_marker(label) }; + } + } + + let snatch_guard = self.device.snatchable_lock.read(); + + for command in self.base.commands.iter() { + match *command { + RenderCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => { + let bind_groups = trackers.bind_groups.read(); + let bind_group = bind_groups.get(bind_group_id).unwrap(); + let raw_bg = bind_group + .raw(&snatch_guard) + .ok_or(ExecutionError::InvalidBindGroup(bind_group_id))?; + unsafe { + raw.set_bind_group( + pipeline_layout.as_ref().unwrap().raw(), + index, + raw_bg, + &offsets[..num_dynamic_offsets], + ) + }; + offsets = &offsets[num_dynamic_offsets..]; + } + RenderCommand::SetPipeline(pipeline_id) => { + let render_pipelines = trackers.render_pipelines.read(); + let pipeline = render_pipelines.get(pipeline_id).unwrap(); + unsafe { raw.set_render_pipeline(pipeline.raw()) }; + + pipeline_layout = Some(pipeline.layout.clone()); + } + RenderCommand::SetIndexBuffer { + buffer_id, + index_format, + offset, + size, + } => { + let buffers = trackers.buffers.read(); + let buffer: &A::Buffer = buffers + .get(buffer_id) + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))? + .raw(&snatch_guard) + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))?; + let bb = hal::BufferBinding { + buffer, + offset, + size, + }; + unsafe { raw.set_index_buffer(bb, index_format) }; + } + RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + } => { + let buffers = trackers.buffers.read(); + let buffer = buffers + .get(buffer_id) + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))? + .raw(&snatch_guard) + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))?; + let bb = hal::BufferBinding { + buffer, + offset, + size, + }; + unsafe { raw.set_vertex_buffer(slot, bb) }; + } + RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset, + } => { + let pipeline_layout = pipeline_layout.as_ref().unwrap(); + + if let Some(values_offset) = values_offset { + let values_end_offset = + (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize; + let data_slice = &self.base.push_constant_data + [(values_offset as usize)..values_end_offset]; + + unsafe { + raw.set_push_constants( + pipeline_layout.raw(), + stages, + offset, + data_slice, + ) + } + } else { + super::push_constant_clear( + offset, + size_bytes, + |clear_offset, clear_data| { + unsafe { + raw.set_push_constants( + pipeline_layout.raw(), + stages, + clear_offset, + clear_data, + ) + }; + }, + ); + } + } + RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + } => { + unsafe { raw.draw(first_vertex, vertex_count, first_instance, instance_count) }; + } + RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + } => { + unsafe { + raw.draw_indexed( + first_index, + index_count, + base_vertex, + first_instance, + instance_count, + ) + }; + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: false, + } => { + let buffers = trackers.buffers.read(); + let buffer = buffers + .get(buffer_id) + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))? + .raw(&snatch_guard) + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))?; + unsafe { raw.draw_indirect(buffer, offset, 1) }; + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: true, + } => { + let buffers = trackers.buffers.read(); + let buffer = buffers + .get(buffer_id) + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))? + .raw(&snatch_guard) + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))?; + unsafe { raw.draw_indexed_indirect(buffer, offset, 1) }; + } + RenderCommand::MultiDrawIndirect { .. } + | RenderCommand::MultiDrawIndirectCount { .. } => { + return Err(ExecutionError::Unimplemented("multi-draw-indirect")) + } + RenderCommand::PushDebugGroup { .. } + | RenderCommand::InsertDebugMarker { .. } + | RenderCommand::PopDebugGroup => { + return Err(ExecutionError::Unimplemented("debug-markers")) + } + RenderCommand::WriteTimestamp { .. } + | RenderCommand::BeginOcclusionQuery { .. } + | RenderCommand::EndOcclusionQuery + | RenderCommand::BeginPipelineStatisticsQuery { .. } + | RenderCommand::EndPipelineStatisticsQuery => { + return Err(ExecutionError::Unimplemented("queries")) + } + RenderCommand::ExecuteBundle(_) + | RenderCommand::SetBlendConstant(_) + | RenderCommand::SetStencilReference(_) + | RenderCommand::SetViewport { .. } + | RenderCommand::SetScissor(_) => unreachable!(), + } + } + + if !self.discard_hal_labels { + if let Some(_) = self.base.label { + unsafe { raw.end_debug_marker() }; + } + } + + Ok(()) + } +} + +impl<A: HalApi> Resource for RenderBundle<A> { + const TYPE: ResourceType = "RenderBundle"; + + type Marker = crate::id::markers::RenderBundle; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +/// A render bundle's current index buffer state. +/// +/// [`RenderBundleEncoder::finish`] records the currently set index buffer here, +/// and calls [`State::flush_index`] before any indexed draw command to produce +/// a `SetIndexBuffer` command if one is necessary. +#[derive(Debug)] +struct IndexState { + buffer: id::BufferId, + format: wgt::IndexFormat, + range: Range<wgt::BufferAddress>, + is_dirty: bool, +} + +impl IndexState { + /// Return the number of entries in the current index buffer. + /// + /// Panic if no index buffer has been set. + fn limit(&self) -> u64 { + let bytes_per_index = match self.format { + wgt::IndexFormat::Uint16 => 2, + wgt::IndexFormat::Uint32 => 4, + }; + + (self.range.end - self.range.start) / bytes_per_index + } + + /// Generate a `SetIndexBuffer` command to prepare for an indexed draw + /// command, if needed. + fn flush(&mut self) -> Option<RenderCommand> { + if self.is_dirty { + self.is_dirty = false; + Some(RenderCommand::SetIndexBuffer { + buffer_id: self.buffer, + index_format: self.format, + offset: self.range.start, + size: wgt::BufferSize::new(self.range.end - self.range.start), + }) + } else { + None + } + } +} + +/// The state of a single vertex buffer slot during render bundle encoding. +/// +/// [`RenderBundleEncoder::finish`] uses this to drop redundant +/// `SetVertexBuffer` commands from the final [`RenderBundle`]. It +/// records one vertex buffer slot's state changes here, and then +/// calls this type's [`flush`] method just before any draw command to +/// produce a `SetVertexBuffer` commands if one is necessary. +/// +/// [`flush`]: IndexState::flush +#[derive(Debug)] +struct VertexState { + buffer: id::BufferId, + range: Range<wgt::BufferAddress>, + is_dirty: bool, +} + +impl VertexState { + fn new(buffer: id::BufferId, range: Range<wgt::BufferAddress>) -> Self { + Self { + buffer, + range, + is_dirty: true, + } + } + + /// Generate a `SetVertexBuffer` command for this slot, if necessary. + /// + /// `slot` is the index of the vertex buffer slot that `self` tracks. + fn flush(&mut self, slot: u32) -> Option<RenderCommand> { + if self.is_dirty { + self.is_dirty = false; + Some(RenderCommand::SetVertexBuffer { + slot, + buffer_id: self.buffer, + offset: self.range.start, + size: wgt::BufferSize::new(self.range.end - self.range.start), + }) + } else { + None + } + } +} + +/// A bind group that has been set at a particular index during render bundle encoding. +#[derive(Debug)] +struct BindState<A: HalApi> { + /// The id of the bind group set at this index. + bind_group: Arc<BindGroup<A>>, + + /// The layout of `group`. + layout: Arc<BindGroupLayout<A>>, + + /// The range of dynamic offsets for this bind group, in the original + /// command stream's `BassPass::dynamic_offsets` array. + dynamic_offsets: Range<usize>, + + /// True if this index's contents have been changed since the last time we + /// generated a `SetBindGroup` command. + is_dirty: bool, +} + +/// The bundle's current pipeline, and some cached information needed for validation. +struct PipelineState<A: HalApi> { + /// The pipeline + pipeline: Arc<RenderPipeline<A>>, + + /// How this pipeline's vertex shader traverses each vertex buffer, indexed + /// by vertex buffer slot number. + steps: Vec<VertexStep>, + + /// Ranges of push constants this pipeline uses, copied from the pipeline + /// layout. + push_constant_ranges: ArrayVec<wgt::PushConstantRange, { SHADER_STAGE_COUNT }>, + + /// The number of bind groups this pipeline uses. + used_bind_groups: usize, +} + +impl<A: HalApi> PipelineState<A> { + fn new(pipeline: &Arc<RenderPipeline<A>>) -> Self { + Self { + pipeline: pipeline.clone(), + steps: pipeline.vertex_steps.to_vec(), + push_constant_ranges: pipeline + .layout + .push_constant_ranges + .iter() + .cloned() + .collect(), + used_bind_groups: pipeline.layout.bind_group_layouts.len(), + } + } + + /// Return a sequence of commands to zero the push constant ranges this + /// pipeline uses. If no initialization is necessary, return `None`. + fn zero_push_constants(&self) -> Option<impl Iterator<Item = RenderCommand>> { + if !self.push_constant_ranges.is_empty() { + let nonoverlapping_ranges = + super::bind::compute_nonoverlapping_ranges(&self.push_constant_ranges); + + Some( + nonoverlapping_ranges + .into_iter() + .map(|range| RenderCommand::SetPushConstant { + stages: range.stages, + offset: range.range.start, + size_bytes: range.range.end - range.range.start, + values_offset: None, // write zeros + }), + ) + } else { + None + } + } +} + +/// State for analyzing and cleaning up bundle command streams. +/// +/// To minimize state updates, [`RenderBundleEncoder::finish`] +/// actually just applies commands like [`SetBindGroup`] and +/// [`SetIndexBuffer`] to the simulated state stored here, and then +/// calls the `flush_foo` methods before draw calls to produce the +/// update commands we actually need. +/// +/// [`SetBindGroup`]: RenderCommand::SetBindGroup +/// [`SetIndexBuffer`]: RenderCommand::SetIndexBuffer +struct State<A: HalApi> { + /// Resources used by this bundle. This will become [`RenderBundle::used`]. + trackers: RenderBundleScope<A>, + + /// The currently set pipeline, if any. + pipeline: Option<PipelineState<A>>, + + /// The bind group set at each index, if any. + bind: ArrayVec<Option<BindState<A>>, { hal::MAX_BIND_GROUPS }>, + + /// The state of each vertex buffer slot. + vertex: ArrayVec<Option<VertexState>, { hal::MAX_VERTEX_BUFFERS }>, + + /// The current index buffer, if one has been set. We flush this state + /// before indexed draw commands. + index: Option<IndexState>, + + /// Dynamic offset values used by the cleaned-up command sequence. + /// + /// This becomes the final [`RenderBundle`]'s [`BasePass`]'s + /// [`dynamic_offsets`] list. + /// + /// [`dynamic_offsets`]: BasePass::dynamic_offsets + flat_dynamic_offsets: Vec<wgt::DynamicOffset>, +} + +impl<A: HalApi> State<A> { + /// Return the id of the current pipeline, if any. + fn pipeline_id(&self) -> Option<id::RenderPipelineId> { + self.pipeline.as_ref().map(|p| p.pipeline.as_info().id()) + } + + /// Return the current pipeline state. Return an error if none is set. + fn pipeline(&self, scope: PassErrorScope) -> Result<&PipelineState<A>, RenderBundleError> { + self.pipeline + .as_ref() + .ok_or(DrawError::MissingPipeline) + .map_pass_err(scope) + } + + /// Mark all non-empty bind group table entries from `index` onwards as dirty. + fn invalidate_bind_group_from(&mut self, index: usize) { + for contents in self.bind[index..].iter_mut().flatten() { + contents.is_dirty = true; + } + } + + fn set_bind_group( + &mut self, + slot: u32, + bind_group: &Arc<BindGroup<A>>, + layout: &Arc<BindGroupLayout<A>>, + dynamic_offsets: Range<usize>, + ) { + // If this call wouldn't actually change this index's state, we can + // return early. (If there are dynamic offsets, the range will always + // be different.) + if dynamic_offsets.is_empty() { + if let Some(ref contents) = self.bind[slot as usize] { + if contents.bind_group.is_equal(bind_group) { + return; + } + } + } + + // Record the index's new state. + self.bind[slot as usize] = Some(BindState { + bind_group: bind_group.clone(), + layout: layout.clone(), + dynamic_offsets, + is_dirty: true, + }); + + // Once we've changed the bind group at a particular index, all + // subsequent indices need to be rewritten. + self.invalidate_bind_group_from(slot as usize + 1); + } + + /// Determine which bind group slots need to be re-set after a pipeline change. + /// + /// Given that we are switching from the current pipeline state to `new`, + /// whose layout is `layout`, mark all the bind group slots that we need to + /// emit new `SetBindGroup` commands for as dirty. + /// + /// According to `wgpu_hal`'s rules: + /// + /// - If the layout of any bind group slot changes, then that slot and + /// all following slots must have their bind groups re-established. + /// + /// - Changing the push constant ranges at all requires re-establishing + /// all bind groups. + fn invalidate_bind_groups(&mut self, new: &PipelineState<A>, layout: &PipelineLayout<A>) { + match self.pipeline { + None => { + // Establishing entirely new pipeline state. + self.invalidate_bind_group_from(0); + } + Some(ref old) => { + if old.pipeline.is_equal(&new.pipeline) { + // Everything is derived from the pipeline, so if the id has + // not changed, there's no need to consider anything else. + return; + } + + // Any push constant change invalidates all groups. + if old.push_constant_ranges != new.push_constant_ranges { + self.invalidate_bind_group_from(0); + } else { + let first_changed = self.bind.iter().zip(&layout.bind_group_layouts).position( + |(entry, layout)| match *entry { + Some(ref contents) => !contents.layout.is_equal(layout), + None => false, + }, + ); + if let Some(slot) = first_changed { + self.invalidate_bind_group_from(slot); + } + } + } + } + } + + /// Set the bundle's current index buffer and its associated parameters. + fn set_index_buffer( + &mut self, + buffer: id::BufferId, + format: wgt::IndexFormat, + range: Range<wgt::BufferAddress>, + ) { + match self.index { + Some(ref current) + if current.buffer == buffer + && current.format == format + && current.range == range => + { + return + } + _ => (), + } + + self.index = Some(IndexState { + buffer, + format, + range, + is_dirty: true, + }); + } + + /// Generate a `SetIndexBuffer` command to prepare for an indexed draw + /// command, if needed. + fn flush_index(&mut self) -> Option<RenderCommand> { + self.index.as_mut().and_then(|index| index.flush()) + } + + fn flush_vertices(&mut self) -> impl Iterator<Item = RenderCommand> + '_ { + self.vertex + .iter_mut() + .enumerate() + .flat_map(|(i, vs)| vs.as_mut().and_then(|vs| vs.flush(i as u32))) + } + + /// Generate `SetBindGroup` commands for any bind groups that need to be updated. + fn flush_binds( + &mut self, + used_bind_groups: usize, + dynamic_offsets: &[wgt::DynamicOffset], + ) -> impl Iterator<Item = RenderCommand> + '_ { + // Append each dirty bind group's dynamic offsets to `flat_dynamic_offsets`. + for contents in self.bind[..used_bind_groups].iter().flatten() { + if contents.is_dirty { + self.flat_dynamic_offsets + .extend_from_slice(&dynamic_offsets[contents.dynamic_offsets.clone()]); + } + } + + // Then, generate `SetBindGroup` commands to update the dirty bind + // groups. After this, all bind groups are clean. + self.bind[..used_bind_groups] + .iter_mut() + .enumerate() + .flat_map(|(i, entry)| { + if let Some(ref mut contents) = *entry { + if contents.is_dirty { + contents.is_dirty = false; + let offsets = &contents.dynamic_offsets; + return Some(RenderCommand::SetBindGroup { + index: i.try_into().unwrap(), + bind_group_id: contents.bind_group.as_info().id(), + num_dynamic_offsets: offsets.end - offsets.start, + }); + } + } + None + }) + } +} + +/// Error encountered when finishing recording a render bundle. +#[derive(Clone, Debug, Error)] +pub(super) enum RenderBundleErrorInner { + #[error("Resource is not valid to use with this render bundle because the resource and the bundle come from different devices")] + NotValidToUse, + #[error(transparent)] + Device(#[from] DeviceError), + #[error(transparent)] + RenderCommand(RenderCommandError), + #[error(transparent)] + Draw(#[from] DrawError), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), +} + +impl<T> From<T> for RenderBundleErrorInner +where + T: Into<RenderCommandError>, +{ + fn from(t: T) -> Self { + Self::RenderCommand(t.into()) + } +} + +/// Error encountered when finishing recording a render bundle. +#[derive(Clone, Debug, Error)] +#[error("{scope}")] +pub struct RenderBundleError { + pub scope: PassErrorScope, + #[source] + inner: RenderBundleErrorInner, +} + +impl RenderBundleError { + pub(crate) const INVALID_DEVICE: Self = RenderBundleError { + scope: PassErrorScope::Bundle, + inner: RenderBundleErrorInner::Device(DeviceError::Invalid), + }; +} +impl PrettyError for RenderBundleError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + // This error is wrapper for the inner error, + // but the scope has useful labels + fmt.error(self); + self.scope.fmt_pretty(fmt); + } +} + +impl<T, E> MapPassErr<T, RenderBundleError> for Result<T, E> +where + E: Into<RenderBundleErrorInner>, +{ + fn map_pass_err(self, scope: PassErrorScope) -> Result<T, RenderBundleError> { + self.map_err(|inner| RenderBundleError { + scope, + inner: inner.into(), + }) + } +} + +pub mod bundle_ffi { + use super::{RenderBundleEncoder, RenderCommand}; + use crate::{id, RawString}; + use std::{convert::TryInto, slice}; + use wgt::{BufferAddress, BufferSize, DynamicOffset, IndexFormat}; + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `offset_length` elements. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_set_bind_group( + bundle: &mut RenderBundleEncoder, + index: u32, + bind_group_id: id::BindGroupId, + offsets: *const DynamicOffset, + offset_length: usize, + ) { + let redundant = unsafe { + bundle.current_bind_groups.set_and_check_redundant( + bind_group_id, + index, + &mut bundle.base.dynamic_offsets, + offsets, + offset_length, + ) + }; + + if redundant { + return; + } + + bundle.base.commands.push(RenderCommand::SetBindGroup { + index, + num_dynamic_offsets: offset_length, + bind_group_id, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_set_pipeline( + bundle: &mut RenderBundleEncoder, + pipeline_id: id::RenderPipelineId, + ) { + if bundle.current_pipeline.set_and_check_redundant(pipeline_id) { + return; + } + + bundle + .base + .commands + .push(RenderCommand::SetPipeline(pipeline_id)); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_set_vertex_buffer( + bundle: &mut RenderBundleEncoder, + slot: u32, + buffer_id: id::BufferId, + offset: BufferAddress, + size: Option<BufferSize>, + ) { + bundle.base.commands.push(RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_set_index_buffer( + encoder: &mut RenderBundleEncoder, + buffer: id::BufferId, + index_format: IndexFormat, + offset: BufferAddress, + size: Option<BufferSize>, + ) { + encoder.set_index_buffer(buffer, index_format, offset, size); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `data` elements. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_set_push_constants( + pass: &mut RenderBundleEncoder, + stages: wgt::ShaderStages, + offset: u32, + size_bytes: u32, + data: *const u8, + ) { + assert_eq!( + offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant offset must be aligned to 4 bytes." + ); + assert_eq!( + size_bytes & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant size must be aligned to 4 bytes." + ); + let data_slice = unsafe { slice::from_raw_parts(data, size_bytes as usize) }; + let value_offset = pass.base.push_constant_data.len().try_into().expect( + "Ran out of push constant space. Don't set 4gb of push constants per RenderBundle.", + ); + + pass.base.push_constant_data.extend( + data_slice + .chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) + .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])), + ); + + pass.base.commands.push(RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset: Some(value_offset), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_draw( + bundle: &mut RenderBundleEncoder, + vertex_count: u32, + instance_count: u32, + first_vertex: u32, + first_instance: u32, + ) { + bundle.base.commands.push(RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_draw_indexed( + bundle: &mut RenderBundleEncoder, + index_count: u32, + instance_count: u32, + first_index: u32, + base_vertex: i32, + first_instance: u32, + ) { + bundle.base.commands.push(RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_draw_indirect( + bundle: &mut RenderBundleEncoder, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + bundle.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: false, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_draw_indexed_indirect( + bundle: &mut RenderBundleEncoder, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + bundle.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: true, + }); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given `label` + /// is a valid null-terminated string. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_push_debug_group( + _bundle: &mut RenderBundleEncoder, + _label: RawString, + ) { + //TODO + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_pop_debug_group(_bundle: &mut RenderBundleEncoder) { + //TODO + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given `label` + /// is a valid null-terminated string. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_insert_debug_marker( + _bundle: &mut RenderBundleEncoder, + _label: RawString, + ) { + //TODO + } +} diff --git a/third_party/rust/wgpu-core/src/command/clear.rs b/third_party/rust/wgpu-core/src/command/clear.rs new file mode 100644 index 0000000000..2569fea1a4 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/clear.rs @@ -0,0 +1,493 @@ +use std::{ops::Range, sync::Arc}; + +#[cfg(feature = "trace")] +use crate::device::trace::Command as TraceCommand; +use crate::{ + api_log, + command::CommandBuffer, + device::DeviceError, + get_lowest_common_denom, + global::Global, + hal_api::HalApi, + id::{BufferId, CommandEncoderId, DeviceId, TextureId}, + init_tracker::{MemoryInitKind, TextureInitRange}, + resource::{Resource, Texture, TextureClearMode}, + track::{TextureSelector, TextureTracker}, +}; + +use hal::CommandEncoder as _; +use thiserror::Error; +use wgt::{math::align_to, BufferAddress, BufferUsages, ImageSubresourceRange, TextureAspect}; + +/// Error encountered while attempting a clear. +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum ClearError { + #[error("To use clear_texture the CLEAR_TEXTURE feature needs to be enabled")] + MissingClearTextureFeature, + #[error("Command encoder {0:?} is invalid")] + InvalidCommandEncoder(CommandEncoderId), + #[error("Device {0:?} is invalid")] + InvalidDevice(DeviceId), + #[error("Buffer {0:?} is invalid or destroyed")] + InvalidBuffer(BufferId), + #[error("Texture {0:?} is invalid or destroyed")] + InvalidTexture(TextureId), + #[error("Texture {0:?} can not be cleared")] + NoValidTextureClearMode(TextureId), + #[error("Buffer clear size {0:?} is not a multiple of `COPY_BUFFER_ALIGNMENT`")] + UnalignedFillSize(BufferAddress), + #[error("Buffer offset {0:?} is not a multiple of `COPY_BUFFER_ALIGNMENT`")] + UnalignedBufferOffset(BufferAddress), + #[error("Clear of {start_offset}..{end_offset} would end up overrunning the bounds of the buffer of size {buffer_size}")] + BufferOverrun { + start_offset: BufferAddress, + end_offset: BufferAddress, + buffer_size: BufferAddress, + }, + #[error("Destination buffer is missing the `COPY_DST` usage flag")] + MissingCopyDstUsageFlag(Option<BufferId>, Option<TextureId>), + #[error("Texture lacks the aspects that were specified in the image subresource range. Texture with format {texture_format:?}, specified was {subresource_range_aspects:?}")] + MissingTextureAspect { + texture_format: wgt::TextureFormat, + subresource_range_aspects: TextureAspect, + }, + #[error("Image subresource level range is outside of the texture's level range. texture range is {texture_level_range:?}, \ +whereas subesource range specified start {subresource_base_mip_level} and count {subresource_mip_level_count:?}")] + InvalidTextureLevelRange { + texture_level_range: Range<u32>, + subresource_base_mip_level: u32, + subresource_mip_level_count: Option<u32>, + }, + #[error("Image subresource layer range is outside of the texture's layer range. texture range is {texture_layer_range:?}, \ +whereas subesource range specified start {subresource_base_array_layer} and count {subresource_array_layer_count:?}")] + InvalidTextureLayerRange { + texture_layer_range: Range<u32>, + subresource_base_array_layer: u32, + subresource_array_layer_count: Option<u32>, + }, + #[error(transparent)] + Device(#[from] DeviceError), +} + +impl Global { + pub fn command_encoder_clear_buffer<A: HalApi>( + &self, + command_encoder_id: CommandEncoderId, + dst: BufferId, + offset: BufferAddress, + size: Option<BufferAddress>, + ) -> Result<(), ClearError> { + profiling::scope!("CommandEncoder::clear_buffer"); + api_log!("CommandEncoder::clear_buffer {dst:?}"); + + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id) + .map_err(|_| ClearError::InvalidCommandEncoder(command_encoder_id))?; + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(TraceCommand::ClearBuffer { dst, offset, size }); + } + + let (dst_buffer, dst_pending) = { + let buffer_guard = hub.buffers.read(); + let dst_buffer = buffer_guard + .get(dst) + .map_err(|_| ClearError::InvalidBuffer(dst))?; + cmd_buf_data + .trackers + .buffers + .set_single(dst_buffer, hal::BufferUses::COPY_DST) + .ok_or(ClearError::InvalidBuffer(dst))? + }; + let snatch_guard = dst_buffer.device.snatchable_lock.read(); + let dst_raw = dst_buffer + .raw + .get(&snatch_guard) + .ok_or(ClearError::InvalidBuffer(dst))?; + if !dst_buffer.usage.contains(BufferUsages::COPY_DST) { + return Err(ClearError::MissingCopyDstUsageFlag(Some(dst), None)); + } + + // Check if offset & size are valid. + if offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(ClearError::UnalignedBufferOffset(offset)); + } + if let Some(size) = size { + if size % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(ClearError::UnalignedFillSize(size)); + } + let destination_end_offset = offset + size; + if destination_end_offset > dst_buffer.size { + return Err(ClearError::BufferOverrun { + start_offset: offset, + end_offset: destination_end_offset, + buffer_size: dst_buffer.size, + }); + } + } + + let end = match size { + Some(size) => offset + size, + None => dst_buffer.size, + }; + if offset == end { + log::trace!("Ignoring fill_buffer of size 0"); + return Ok(()); + } + + // Mark dest as initialized. + cmd_buf_data.buffer_memory_init_actions.extend( + dst_buffer.initialization_status.read().create_action( + &dst_buffer, + offset..end, + MemoryInitKind::ImplicitlyInitialized, + ), + ); + + // actual hal barrier & operation + let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard)); + let cmd_buf_raw = cmd_buf_data.encoder.open()?; + unsafe { + cmd_buf_raw.transition_buffers(dst_barrier.into_iter()); + cmd_buf_raw.clear_buffer(dst_raw, offset..end); + } + Ok(()) + } + + pub fn command_encoder_clear_texture<A: HalApi>( + &self, + command_encoder_id: CommandEncoderId, + dst: TextureId, + subresource_range: &ImageSubresourceRange, + ) -> Result<(), ClearError> { + profiling::scope!("CommandEncoder::clear_texture"); + api_log!("CommandEncoder::clear_texture {dst:?}"); + + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id) + .map_err(|_| ClearError::InvalidCommandEncoder(command_encoder_id))?; + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(TraceCommand::ClearTexture { + dst, + subresource_range: *subresource_range, + }); + } + + if !cmd_buf.support_clear_texture { + return Err(ClearError::MissingClearTextureFeature); + } + + let dst_texture = hub + .textures + .get(dst) + .map_err(|_| ClearError::InvalidTexture(dst))?; + + // Check if subresource aspects are valid. + let clear_aspects = + hal::FormatAspects::new(dst_texture.desc.format, subresource_range.aspect); + if clear_aspects.is_empty() { + return Err(ClearError::MissingTextureAspect { + texture_format: dst_texture.desc.format, + subresource_range_aspects: subresource_range.aspect, + }); + }; + + // Check if subresource level range is valid + let subresource_mip_range = subresource_range.mip_range(dst_texture.full_range.mips.end); + if dst_texture.full_range.mips.start > subresource_mip_range.start + || dst_texture.full_range.mips.end < subresource_mip_range.end + { + return Err(ClearError::InvalidTextureLevelRange { + texture_level_range: dst_texture.full_range.mips.clone(), + subresource_base_mip_level: subresource_range.base_mip_level, + subresource_mip_level_count: subresource_range.mip_level_count, + }); + } + // Check if subresource layer range is valid + let subresource_layer_range = + subresource_range.layer_range(dst_texture.full_range.layers.end); + if dst_texture.full_range.layers.start > subresource_layer_range.start + || dst_texture.full_range.layers.end < subresource_layer_range.end + { + return Err(ClearError::InvalidTextureLayerRange { + texture_layer_range: dst_texture.full_range.layers.clone(), + subresource_base_array_layer: subresource_range.base_array_layer, + subresource_array_layer_count: subresource_range.array_layer_count, + }); + } + + let device = &cmd_buf.device; + if !device.is_valid() { + return Err(ClearError::InvalidDevice(cmd_buf.device.as_info().id())); + } + let (encoder, tracker) = cmd_buf_data.open_encoder_and_tracker()?; + + clear_texture( + &dst_texture, + TextureInitRange { + mip_range: subresource_mip_range, + layer_range: subresource_layer_range, + }, + encoder, + &mut tracker.textures, + &device.alignments, + device.zero_buffer.as_ref().unwrap(), + ) + } +} + +pub(crate) fn clear_texture<A: HalApi>( + dst_texture: &Arc<Texture<A>>, + range: TextureInitRange, + encoder: &mut A::CommandEncoder, + texture_tracker: &mut TextureTracker<A>, + alignments: &hal::Alignments, + zero_buffer: &A::Buffer, +) -> Result<(), ClearError> { + let snatch_guard = dst_texture.device.snatchable_lock.read(); + let dst_raw = dst_texture + .raw(&snatch_guard) + .ok_or_else(|| ClearError::InvalidTexture(dst_texture.as_info().id()))?; + + // Issue the right barrier. + let clear_usage = match *dst_texture.clear_mode.read() { + TextureClearMode::BufferCopy => hal::TextureUses::COPY_DST, + TextureClearMode::RenderPass { + is_color: false, .. + } => hal::TextureUses::DEPTH_STENCIL_WRITE, + TextureClearMode::Surface { .. } | TextureClearMode::RenderPass { is_color: true, .. } => { + hal::TextureUses::COLOR_TARGET + } + TextureClearMode::None => { + return Err(ClearError::NoValidTextureClearMode( + dst_texture.as_info().id(), + )); + } + }; + + let selector = TextureSelector { + mips: range.mip_range.clone(), + layers: range.layer_range.clone(), + }; + + // If we're in a texture-init usecase, we know that the texture is already + // tracked since whatever caused the init requirement, will have caused the + // usage tracker to be aware of the texture. Meaning, that it is safe to + // call call change_replace_tracked if the life_guard is already gone (i.e. + // the user no longer holds on to this texture). + // + // On the other hand, when coming via command_encoder_clear_texture, the + // life_guard is still there since in order to call it a texture object is + // needed. + // + // We could in theory distinguish these two scenarios in the internal + // clear_texture api in order to remove this check and call the cheaper + // change_replace_tracked whenever possible. + let dst_barrier = texture_tracker + .set_single(dst_texture, selector, clear_usage) + .unwrap() + .map(|pending| pending.into_hal(dst_raw)); + unsafe { + encoder.transition_textures(dst_barrier.into_iter()); + } + + // Record actual clearing + match *dst_texture.clear_mode.read() { + TextureClearMode::BufferCopy => clear_texture_via_buffer_copies::<A>( + &dst_texture.desc, + alignments, + zero_buffer, + range, + encoder, + dst_raw, + ), + TextureClearMode::Surface { .. } => { + clear_texture_via_render_passes(dst_texture, range, true, encoder)? + } + TextureClearMode::RenderPass { is_color, .. } => { + clear_texture_via_render_passes(dst_texture, range, is_color, encoder)? + } + TextureClearMode::None => { + return Err(ClearError::NoValidTextureClearMode( + dst_texture.as_info().id(), + )); + } + } + Ok(()) +} + +fn clear_texture_via_buffer_copies<A: HalApi>( + texture_desc: &wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>, + alignments: &hal::Alignments, + zero_buffer: &A::Buffer, // Buffer of size device::ZERO_BUFFER_SIZE + range: TextureInitRange, + encoder: &mut A::CommandEncoder, + dst_raw: &A::Texture, +) { + assert!(!texture_desc.format.is_depth_stencil_format()); + + if texture_desc.format == wgt::TextureFormat::NV12 { + // TODO: Currently COPY_DST for NV12 textures is unsupported. + return; + } + + // Gather list of zero_buffer copies and issue a single command then to perform them + let mut zero_buffer_copy_regions = Vec::new(); + let buffer_copy_pitch = alignments.buffer_copy_pitch.get() as u32; + let (block_width, block_height) = texture_desc.format.block_dimensions(); + let block_size = texture_desc.format.block_copy_size(None).unwrap(); + + let bytes_per_row_alignment = get_lowest_common_denom(buffer_copy_pitch, block_size); + + for mip_level in range.mip_range { + let mut mip_size = texture_desc.mip_level_size(mip_level).unwrap(); + // Round to multiple of block size + mip_size.width = align_to(mip_size.width, block_width); + mip_size.height = align_to(mip_size.height, block_height); + + let bytes_per_row = align_to( + mip_size.width / block_width * block_size, + bytes_per_row_alignment, + ); + + let max_rows_per_copy = crate::device::ZERO_BUFFER_SIZE as u32 / bytes_per_row; + // round down to a multiple of rows needed by the texture format + let max_rows_per_copy = max_rows_per_copy / block_height * block_height; + assert!( + max_rows_per_copy > 0, + "Zero buffer size is too small to fill a single row \ + of a texture with format {:?} and desc {:?}", + texture_desc.format, + texture_desc.size + ); + + let z_range = 0..(if texture_desc.dimension == wgt::TextureDimension::D3 { + mip_size.depth_or_array_layers + } else { + 1 + }); + + for array_layer in range.layer_range.clone() { + // TODO: Only doing one layer at a time for volume textures right now. + for z in z_range.clone() { + // May need multiple copies for each subresource! However, we + // assume that we never need to split a row. + let mut num_rows_left = mip_size.height; + while num_rows_left > 0 { + let num_rows = num_rows_left.min(max_rows_per_copy); + + zero_buffer_copy_regions.push(hal::BufferTextureCopy { + buffer_layout: wgt::ImageDataLayout { + offset: 0, + bytes_per_row: Some(bytes_per_row), + rows_per_image: None, + }, + texture_base: hal::TextureCopyBase { + mip_level, + array_layer, + origin: wgt::Origin3d { + x: 0, // Always full rows + y: mip_size.height - num_rows_left, + z, + }, + aspect: hal::FormatAspects::COLOR, + }, + size: hal::CopyExtent { + width: mip_size.width, // full row + height: num_rows, + depth: 1, // Only single slice of volume texture at a time right now + }, + }); + + num_rows_left -= num_rows; + } + } + } + } + + unsafe { + encoder.copy_buffer_to_texture(zero_buffer, dst_raw, zero_buffer_copy_regions.into_iter()); + } +} + +fn clear_texture_via_render_passes<A: HalApi>( + dst_texture: &Texture<A>, + range: TextureInitRange, + is_color: bool, + encoder: &mut A::CommandEncoder, +) -> Result<(), ClearError> { + assert_eq!(dst_texture.desc.dimension, wgt::TextureDimension::D2); + + let extent_base = wgt::Extent3d { + width: dst_texture.desc.size.width, + height: dst_texture.desc.size.height, + depth_or_array_layers: 1, // Only one layer is cleared at a time. + }; + let clear_mode = &dst_texture.clear_mode.read(); + + for mip_level in range.mip_range { + let extent = extent_base.mip_level_size(mip_level, dst_texture.desc.dimension); + for depth_or_layer in range.layer_range.clone() { + let color_attachments_tmp; + let (color_attachments, depth_stencil_attachment) = if is_color { + color_attachments_tmp = [Some(hal::ColorAttachment { + target: hal::Attachment { + view: Texture::get_clear_view( + clear_mode, + &dst_texture.desc, + mip_level, + depth_or_layer, + ), + usage: hal::TextureUses::COLOR_TARGET, + }, + resolve_target: None, + ops: hal::AttachmentOps::STORE, + clear_value: wgt::Color::TRANSPARENT, + })]; + (&color_attachments_tmp[..], None) + } else { + ( + &[][..], + Some(hal::DepthStencilAttachment { + target: hal::Attachment { + view: Texture::get_clear_view( + clear_mode, + &dst_texture.desc, + mip_level, + depth_or_layer, + ), + usage: hal::TextureUses::DEPTH_STENCIL_WRITE, + }, + depth_ops: hal::AttachmentOps::STORE, + stencil_ops: hal::AttachmentOps::STORE, + clear_value: (0.0, 0), + }), + ) + }; + unsafe { + encoder.begin_render_pass(&hal::RenderPassDescriptor { + label: Some("(wgpu internal) clear_texture clear pass"), + extent, + sample_count: dst_texture.desc.sample_count, + color_attachments, + depth_stencil_attachment, + multiview: None, + timestamp_writes: None, + occlusion_query_set: None, + }); + encoder.end_render_pass(); + } + } + } + Ok(()) +} diff --git a/third_party/rust/wgpu-core/src/command/compute.rs b/third_party/rust/wgpu-core/src/command/compute.rs new file mode 100644 index 0000000000..804186a01e --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/compute.rs @@ -0,0 +1,1074 @@ +use crate::device::DeviceError; +use crate::resource::Resource; +use crate::snatch::SnatchGuard; +use crate::{ + binding_model::{ + BindError, BindGroup, LateMinBufferBindingSizeMismatch, PushConstantUploadError, + }, + command::{ + bind::Binder, + end_pipeline_statistics_query, + memory_init::{fixup_discarded_surfaces, SurfacesInDiscardState}, + BasePass, BasePassRef, BindGroupStateChange, CommandBuffer, CommandEncoderError, + CommandEncoderStatus, MapPassErr, PassErrorScope, QueryUseError, StateChange, + }, + device::{MissingDownlevelFlags, MissingFeatures}, + error::{ErrorFormatter, PrettyError}, + global::Global, + hal_api::HalApi, + hal_label, id, + id::DeviceId, + init_tracker::MemoryInitKind, + pipeline, + resource::{self}, + storage::Storage, + track::{Tracker, UsageConflict, UsageScope}, + validation::{check_buffer_usage, MissingBufferUsageError}, + Label, +}; + +use hal::CommandEncoder as _; +#[cfg(feature = "serde")] +use serde::Deserialize; +#[cfg(feature = "serde")] +use serde::Serialize; + +use thiserror::Error; + +use std::{fmt, mem, str}; + +#[doc(hidden)] +#[derive(Clone, Copy, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum ComputeCommand { + SetBindGroup { + index: u32, + num_dynamic_offsets: usize, + bind_group_id: id::BindGroupId, + }, + SetPipeline(id::ComputePipelineId), + + /// Set a range of push constants to values stored in [`BasePass::push_constant_data`]. + SetPushConstant { + /// The byte offset within the push constant storage to write to. This + /// must be a multiple of four. + offset: u32, + + /// The number of bytes to write. This must be a multiple of four. + size_bytes: u32, + + /// Index in [`BasePass::push_constant_data`] of the start of the data + /// to be written. + /// + /// Note: this is not a byte offset like `offset`. Rather, it is the + /// index of the first `u32` element in `push_constant_data` to read. + values_offset: u32, + }, + + Dispatch([u32; 3]), + DispatchIndirect { + buffer_id: id::BufferId, + offset: wgt::BufferAddress, + }, + PushDebugGroup { + color: u32, + len: usize, + }, + PopDebugGroup, + InsertDebugMarker { + color: u32, + len: usize, + }, + WriteTimestamp { + query_set_id: id::QuerySetId, + query_index: u32, + }, + BeginPipelineStatisticsQuery { + query_set_id: id::QuerySetId, + query_index: u32, + }, + EndPipelineStatisticsQuery, +} + +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct ComputePass { + base: BasePass<ComputeCommand>, + parent_id: id::CommandEncoderId, + timestamp_writes: Option<ComputePassTimestampWrites>, + + // Resource binding dedupe state. + #[cfg_attr(feature = "serde", serde(skip))] + current_bind_groups: BindGroupStateChange, + #[cfg_attr(feature = "serde", serde(skip))] + current_pipeline: StateChange<id::ComputePipelineId>, +} + +impl ComputePass { + pub fn new(parent_id: id::CommandEncoderId, desc: &ComputePassDescriptor) -> Self { + Self { + base: BasePass::new(&desc.label), + parent_id, + timestamp_writes: desc.timestamp_writes.cloned(), + + current_bind_groups: BindGroupStateChange::new(), + current_pipeline: StateChange::new(), + } + } + + pub fn parent_id(&self) -> id::CommandEncoderId { + self.parent_id + } + + #[cfg(feature = "trace")] + pub fn into_command(self) -> crate::device::trace::Command { + crate::device::trace::Command::RunComputePass { + base: self.base, + timestamp_writes: self.timestamp_writes, + } + } +} + +impl fmt::Debug for ComputePass { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "ComputePass {{ encoder_id: {:?}, data: {:?} commands and {:?} dynamic offsets }}", + self.parent_id, + self.base.commands.len(), + self.base.dynamic_offsets.len() + ) + } +} + +/// Describes the writing of timestamp values in a compute pass. +#[repr(C)] +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct ComputePassTimestampWrites { + /// The query set to write the timestamps to. + pub query_set: id::QuerySetId, + /// The index of the query set at which a start timestamp of this pass is written, if any. + pub beginning_of_pass_write_index: Option<u32>, + /// The index of the query set at which an end timestamp of this pass is written, if any. + pub end_of_pass_write_index: Option<u32>, +} + +#[derive(Clone, Debug, Default)] +pub struct ComputePassDescriptor<'a> { + pub label: Label<'a>, + /// Defines where and when timestamp values will be written for this pass. + pub timestamp_writes: Option<&'a ComputePassTimestampWrites>, +} + +#[derive(Clone, Debug, Error, Eq, PartialEq)] +#[non_exhaustive] +pub enum DispatchError { + #[error("Compute pipeline must be set")] + MissingPipeline, + #[error("Incompatible bind group at index {index} in the current compute pipeline")] + IncompatibleBindGroup { index: u32, diff: Vec<String> }, + #[error( + "Each current dispatch group size dimension ({current:?}) must be less or equal to {limit}" + )] + InvalidGroupSize { current: [u32; 3], limit: u32 }, + #[error(transparent)] + BindingSizeTooSmall(#[from] LateMinBufferBindingSizeMismatch), +} + +/// Error encountered when performing a compute pass. +#[derive(Clone, Debug, Error)] +pub enum ComputePassErrorInner { + #[error(transparent)] + Device(#[from] DeviceError), + #[error(transparent)] + Encoder(#[from] CommandEncoderError), + #[error("Bind group at index {0:?} is invalid")] + InvalidBindGroup(usize), + #[error("Device {0:?} is invalid")] + InvalidDevice(DeviceId), + #[error("Bind group index {index} is greater than the device's requested `max_bind_group` limit {max}")] + BindGroupIndexOutOfRange { index: u32, max: u32 }, + #[error("Compute pipeline {0:?} is invalid")] + InvalidPipeline(id::ComputePipelineId), + #[error("QuerySet {0:?} is invalid")] + InvalidQuerySet(id::QuerySetId), + #[error("Indirect buffer {0:?} is invalid or destroyed")] + InvalidIndirectBuffer(id::BufferId), + #[error("Indirect buffer uses bytes {offset}..{end_offset} which overruns indirect buffer of size {buffer_size}")] + IndirectBufferOverrun { + offset: u64, + end_offset: u64, + buffer_size: u64, + }, + #[error("Buffer {0:?} is invalid or destroyed")] + InvalidBuffer(id::BufferId), + #[error(transparent)] + ResourceUsageConflict(#[from] UsageConflict), + #[error(transparent)] + MissingBufferUsage(#[from] MissingBufferUsageError), + #[error("Cannot pop debug group, because number of pushed debug groups is zero")] + InvalidPopDebugGroup, + #[error(transparent)] + Dispatch(#[from] DispatchError), + #[error(transparent)] + Bind(#[from] BindError), + #[error(transparent)] + PushConstants(#[from] PushConstantUploadError), + #[error(transparent)] + QueryUse(#[from] QueryUseError), + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), +} + +impl PrettyError for ComputePassErrorInner { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + match *self { + Self::InvalidPipeline(id) => { + fmt.compute_pipeline_label(&id); + } + Self::InvalidIndirectBuffer(id) => { + fmt.buffer_label(&id); + } + Self::Dispatch(DispatchError::IncompatibleBindGroup { ref diff, .. }) => { + for d in diff { + fmt.note(&d); + } + } + _ => {} + }; + } +} + +/// Error encountered when performing a compute pass. +#[derive(Clone, Debug, Error)] +#[error("{scope}")] +pub struct ComputePassError { + pub scope: PassErrorScope, + #[source] + inner: ComputePassErrorInner, +} +impl PrettyError for ComputePassError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + // This error is wrapper for the inner error, + // but the scope has useful labels + fmt.error(self); + self.scope.fmt_pretty(fmt); + } +} + +impl<T, E> MapPassErr<T, ComputePassError> for Result<T, E> +where + E: Into<ComputePassErrorInner>, +{ + fn map_pass_err(self, scope: PassErrorScope) -> Result<T, ComputePassError> { + self.map_err(|inner| ComputePassError { + scope, + inner: inner.into(), + }) + } +} + +struct State<A: HalApi> { + binder: Binder<A>, + pipeline: Option<id::ComputePipelineId>, + scope: UsageScope<A>, + debug_scope_depth: u32, +} + +impl<A: HalApi> State<A> { + fn is_ready(&self) -> Result<(), DispatchError> { + let bind_mask = self.binder.invalid_mask(); + if bind_mask != 0 { + //let (expected, provided) = self.binder.entries[index as usize].info(); + let index = bind_mask.trailing_zeros(); + + return Err(DispatchError::IncompatibleBindGroup { + index, + diff: self.binder.bgl_diff(), + }); + } + if self.pipeline.is_none() { + return Err(DispatchError::MissingPipeline); + } + self.binder.check_late_buffer_bindings()?; + + Ok(()) + } + + // `extra_buffer` is there to represent the indirect buffer that is also + // part of the usage scope. + fn flush_states( + &mut self, + raw_encoder: &mut A::CommandEncoder, + base_trackers: &mut Tracker<A>, + bind_group_guard: &Storage<BindGroup<A>>, + indirect_buffer: Option<id::BufferId>, + snatch_guard: &SnatchGuard, + ) -> Result<(), UsageConflict> { + for id in self.binder.list_active() { + unsafe { self.scope.merge_bind_group(&bind_group_guard[id].used)? }; + // Note: stateless trackers are not merged: the lifetime reference + // is held to the bind group itself. + } + + for id in self.binder.list_active() { + unsafe { + base_trackers.set_and_remove_from_usage_scope_sparse( + &mut self.scope, + &bind_group_guard[id].used, + ) + } + } + + // Add the state of the indirect buffer if it hasn't been hit before. + unsafe { + base_trackers + .buffers + .set_and_remove_from_usage_scope_sparse(&mut self.scope.buffers, indirect_buffer); + } + + log::trace!("Encoding dispatch barriers"); + + CommandBuffer::drain_barriers(raw_encoder, base_trackers, snatch_guard); + Ok(()) + } +} + +// Common routines between render/compute + +impl Global { + pub fn command_encoder_run_compute_pass<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + pass: &ComputePass, + ) -> Result<(), ComputePassError> { + self.command_encoder_run_compute_pass_impl::<A>( + encoder_id, + pass.base.as_ref(), + pass.timestamp_writes.as_ref(), + ) + } + + #[doc(hidden)] + pub fn command_encoder_run_compute_pass_impl<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + base: BasePassRef<ComputeCommand>, + timestamp_writes: Option<&ComputePassTimestampWrites>, + ) -> Result<(), ComputePassError> { + profiling::scope!("CommandEncoder::run_compute_pass"); + let pass_scope = PassErrorScope::Pass(encoder_id); + + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, encoder_id).map_pass_err(pass_scope)?; + let device = &cmd_buf.device; + if !device.is_valid() { + return Err(ComputePassErrorInner::InvalidDevice( + cmd_buf.device.as_info().id(), + )) + .map_pass_err(pass_scope); + } + + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(crate::device::trace::Command::RunComputePass { + base: BasePass::from_ref(base), + timestamp_writes: timestamp_writes.cloned(), + }); + } + + let encoder = &mut cmd_buf_data.encoder; + let status = &mut cmd_buf_data.status; + let tracker = &mut cmd_buf_data.trackers; + let buffer_memory_init_actions = &mut cmd_buf_data.buffer_memory_init_actions; + let texture_memory_actions = &mut cmd_buf_data.texture_memory_actions; + + // We automatically keep extending command buffers over time, and because + // we want to insert a command buffer _before_ what we're about to record, + // we need to make sure to close the previous one. + encoder.close().map_pass_err(pass_scope)?; + // will be reset to true if recording is done without errors + *status = CommandEncoderStatus::Error; + let raw = encoder.open().map_pass_err(pass_scope)?; + + let bind_group_guard = hub.bind_groups.read(); + let pipeline_guard = hub.compute_pipelines.read(); + let query_set_guard = hub.query_sets.read(); + let buffer_guard = hub.buffers.read(); + let texture_guard = hub.textures.read(); + + let mut state = State { + binder: Binder::new(), + pipeline: None, + scope: UsageScope::new(&*buffer_guard, &*texture_guard), + debug_scope_depth: 0, + }; + let mut temp_offsets = Vec::new(); + let mut dynamic_offset_count = 0; + let mut string_offset = 0; + let mut active_query = None; + + let timestamp_writes = if let Some(tw) = timestamp_writes { + let query_set: &resource::QuerySet<A> = tracker + .query_sets + .add_single(&*query_set_guard, tw.query_set) + .ok_or(ComputePassErrorInner::InvalidQuerySet(tw.query_set)) + .map_pass_err(pass_scope)?; + + // Unlike in render passes we can't delay resetting the query sets since + // there is no auxiliary pass. + let range = if let (Some(index_a), Some(index_b)) = + (tw.beginning_of_pass_write_index, tw.end_of_pass_write_index) + { + Some(index_a.min(index_b)..index_a.max(index_b) + 1) + } else { + tw.beginning_of_pass_write_index + .or(tw.end_of_pass_write_index) + .map(|i| i..i + 1) + }; + // Range should always be Some, both values being None should lead to a validation error. + // But no point in erroring over that nuance here! + if let Some(range) = range { + unsafe { + raw.reset_queries(query_set.raw.as_ref().unwrap(), range); + } + } + + Some(hal::ComputePassTimestampWrites { + query_set: query_set.raw.as_ref().unwrap(), + beginning_of_pass_write_index: tw.beginning_of_pass_write_index, + end_of_pass_write_index: tw.end_of_pass_write_index, + }) + } else { + None + }; + + let snatch_guard = device.snatchable_lock.read(); + + tracker.set_size( + Some(&*buffer_guard), + Some(&*texture_guard), + None, + None, + Some(&*bind_group_guard), + Some(&*pipeline_guard), + None, + None, + Some(&*query_set_guard), + ); + + let discard_hal_labels = self + .instance + .flags + .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS); + let hal_desc = hal::ComputePassDescriptor { + label: hal_label(base.label, self.instance.flags), + timestamp_writes, + }; + + unsafe { + raw.begin_compute_pass(&hal_desc); + } + + let mut intermediate_trackers = Tracker::<A>::new(); + + // Immediate texture inits required because of prior discards. Need to + // be inserted before texture reads. + let mut pending_discard_init_fixups = SurfacesInDiscardState::new(); + + for command in base.commands { + match *command { + ComputeCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => { + let scope = PassErrorScope::SetBindGroup(bind_group_id); + + let max_bind_groups = cmd_buf.limits.max_bind_groups; + if index >= max_bind_groups { + return Err(ComputePassErrorInner::BindGroupIndexOutOfRange { + index, + max: max_bind_groups, + }) + .map_pass_err(scope); + } + + temp_offsets.clear(); + temp_offsets.extend_from_slice( + &base.dynamic_offsets + [dynamic_offset_count..dynamic_offset_count + num_dynamic_offsets], + ); + dynamic_offset_count += num_dynamic_offsets; + + let bind_group = tracker + .bind_groups + .add_single(&*bind_group_guard, bind_group_id) + .ok_or(ComputePassErrorInner::InvalidBindGroup(index as usize)) + .map_pass_err(scope)?; + bind_group + .validate_dynamic_bindings(index, &temp_offsets, &cmd_buf.limits) + .map_pass_err(scope)?; + + buffer_memory_init_actions.extend( + bind_group.used_buffer_ranges.iter().filter_map(|action| { + action + .buffer + .initialization_status + .read() + .check_action(action) + }), + ); + + for action in bind_group.used_texture_ranges.iter() { + pending_discard_init_fixups + .extend(texture_memory_actions.register_init_action(action)); + } + + let pipeline_layout = state.binder.pipeline_layout.clone(); + let entries = + state + .binder + .assign_group(index as usize, bind_group, &temp_offsets); + if !entries.is_empty() && pipeline_layout.is_some() { + let pipeline_layout = pipeline_layout.as_ref().unwrap().raw(); + for (i, e) in entries.iter().enumerate() { + if let Some(group) = e.group.as_ref() { + let raw_bg = group + .raw(&snatch_guard) + .ok_or(ComputePassErrorInner::InvalidBindGroup(i)) + .map_pass_err(scope)?; + unsafe { + raw.set_bind_group( + pipeline_layout, + index + i as u32, + raw_bg, + &e.dynamic_offsets, + ); + } + } + } + } + } + ComputeCommand::SetPipeline(pipeline_id) => { + let scope = PassErrorScope::SetPipelineCompute(pipeline_id); + + state.pipeline = Some(pipeline_id); + + let pipeline: &pipeline::ComputePipeline<A> = tracker + .compute_pipelines + .add_single(&*pipeline_guard, pipeline_id) + .ok_or(ComputePassErrorInner::InvalidPipeline(pipeline_id)) + .map_pass_err(scope)?; + + unsafe { + raw.set_compute_pipeline(pipeline.raw()); + } + + // Rebind resources + if state.binder.pipeline_layout.is_none() + || !state + .binder + .pipeline_layout + .as_ref() + .unwrap() + .is_equal(&pipeline.layout) + { + let (start_index, entries) = state.binder.change_pipeline_layout( + &pipeline.layout, + &pipeline.late_sized_buffer_groups, + ); + if !entries.is_empty() { + for (i, e) in entries.iter().enumerate() { + if let Some(group) = e.group.as_ref() { + let raw_bg = group + .raw(&snatch_guard) + .ok_or(ComputePassErrorInner::InvalidBindGroup(i)) + .map_pass_err(scope)?; + unsafe { + raw.set_bind_group( + pipeline.layout.raw(), + start_index as u32 + i as u32, + raw_bg, + &e.dynamic_offsets, + ); + } + } + } + } + + // Clear push constant ranges + let non_overlapping = super::bind::compute_nonoverlapping_ranges( + &pipeline.layout.push_constant_ranges, + ); + for range in non_overlapping { + let offset = range.range.start; + let size_bytes = range.range.end - offset; + super::push_constant_clear( + offset, + size_bytes, + |clear_offset, clear_data| unsafe { + raw.set_push_constants( + pipeline.layout.raw(), + wgt::ShaderStages::COMPUTE, + clear_offset, + clear_data, + ); + }, + ); + } + } + } + ComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset, + } => { + let scope = PassErrorScope::SetPushConstant; + + let end_offset_bytes = offset + size_bytes; + let values_end_offset = + (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize; + let data_slice = + &base.push_constant_data[(values_offset as usize)..values_end_offset]; + + let pipeline_layout = state + .binder + .pipeline_layout + .as_ref() + //TODO: don't error here, lazily update the push constants + .ok_or(ComputePassErrorInner::Dispatch( + DispatchError::MissingPipeline, + )) + .map_pass_err(scope)?; + + pipeline_layout + .validate_push_constant_ranges( + wgt::ShaderStages::COMPUTE, + offset, + end_offset_bytes, + ) + .map_pass_err(scope)?; + + unsafe { + raw.set_push_constants( + pipeline_layout.raw(), + wgt::ShaderStages::COMPUTE, + offset, + data_slice, + ); + } + } + ComputeCommand::Dispatch(groups) => { + let scope = PassErrorScope::Dispatch { + indirect: false, + pipeline: state.pipeline, + }; + state.is_ready().map_pass_err(scope)?; + + state + .flush_states( + raw, + &mut intermediate_trackers, + &*bind_group_guard, + None, + &snatch_guard, + ) + .map_pass_err(scope)?; + + let groups_size_limit = cmd_buf.limits.max_compute_workgroups_per_dimension; + + if groups[0] > groups_size_limit + || groups[1] > groups_size_limit + || groups[2] > groups_size_limit + { + return Err(ComputePassErrorInner::Dispatch( + DispatchError::InvalidGroupSize { + current: groups, + limit: groups_size_limit, + }, + )) + .map_pass_err(scope); + } + + unsafe { + raw.dispatch(groups); + } + } + ComputeCommand::DispatchIndirect { buffer_id, offset } => { + let scope = PassErrorScope::Dispatch { + indirect: true, + pipeline: state.pipeline, + }; + + state.is_ready().map_pass_err(scope)?; + + device + .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) + .map_pass_err(scope)?; + + let indirect_buffer = state + .scope + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDIRECT) + .map_pass_err(scope)?; + check_buffer_usage(indirect_buffer.usage, wgt::BufferUsages::INDIRECT) + .map_pass_err(scope)?; + + let end_offset = offset + mem::size_of::<wgt::DispatchIndirectArgs>() as u64; + if end_offset > indirect_buffer.size { + return Err(ComputePassErrorInner::IndirectBufferOverrun { + offset, + end_offset, + buffer_size: indirect_buffer.size, + }) + .map_pass_err(scope); + } + + let buf_raw = indirect_buffer + .raw + .get(&snatch_guard) + .ok_or(ComputePassErrorInner::InvalidIndirectBuffer(buffer_id)) + .map_pass_err(scope)?; + + let stride = 3 * 4; // 3 integers, x/y/z group size + + buffer_memory_init_actions.extend( + indirect_buffer.initialization_status.read().create_action( + indirect_buffer, + offset..(offset + stride), + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + state + .flush_states( + raw, + &mut intermediate_trackers, + &*bind_group_guard, + Some(buffer_id), + &snatch_guard, + ) + .map_pass_err(scope)?; + unsafe { + raw.dispatch_indirect(buf_raw, offset); + } + } + ComputeCommand::PushDebugGroup { color: _, len } => { + state.debug_scope_depth += 1; + if !discard_hal_labels { + let label = + str::from_utf8(&base.string_data[string_offset..string_offset + len]) + .unwrap(); + unsafe { + raw.begin_debug_marker(label); + } + } + string_offset += len; + } + ComputeCommand::PopDebugGroup => { + let scope = PassErrorScope::PopDebugGroup; + + if state.debug_scope_depth == 0 { + return Err(ComputePassErrorInner::InvalidPopDebugGroup) + .map_pass_err(scope); + } + state.debug_scope_depth -= 1; + if !discard_hal_labels { + unsafe { + raw.end_debug_marker(); + } + } + } + ComputeCommand::InsertDebugMarker { color: _, len } => { + if !discard_hal_labels { + let label = + str::from_utf8(&base.string_data[string_offset..string_offset + len]) + .unwrap(); + unsafe { raw.insert_debug_marker(label) } + } + string_offset += len; + } + ComputeCommand::WriteTimestamp { + query_set_id, + query_index, + } => { + let scope = PassErrorScope::WriteTimestamp; + + device + .require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES) + .map_pass_err(scope)?; + + let query_set: &resource::QuerySet<A> = tracker + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(ComputePassErrorInner::InvalidQuerySet(query_set_id)) + .map_pass_err(scope)?; + + query_set + .validate_and_write_timestamp(raw, query_set_id, query_index, None) + .map_pass_err(scope)?; + } + ComputeCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + } => { + let scope = PassErrorScope::BeginPipelineStatisticsQuery; + + let query_set: &resource::QuerySet<A> = tracker + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(ComputePassErrorInner::InvalidQuerySet(query_set_id)) + .map_pass_err(scope)?; + + query_set + .validate_and_begin_pipeline_statistics_query( + raw, + query_set_id, + query_index, + None, + &mut active_query, + ) + .map_pass_err(scope)?; + } + ComputeCommand::EndPipelineStatisticsQuery => { + let scope = PassErrorScope::EndPipelineStatisticsQuery; + + end_pipeline_statistics_query(raw, &*query_set_guard, &mut active_query) + .map_pass_err(scope)?; + } + } + } + + unsafe { + raw.end_compute_pass(); + } + + // We've successfully recorded the compute pass, bring the + // command buffer out of the error state. + *status = CommandEncoderStatus::Recording; + + // Stop the current command buffer. + encoder.close().map_pass_err(pass_scope)?; + + // Create a new command buffer, which we will insert _before_ the body of the compute pass. + // + // Use that buffer to insert barriers and clear discarded images. + let transit = encoder.open().map_pass_err(pass_scope)?; + fixup_discarded_surfaces( + pending_discard_init_fixups.into_iter(), + transit, + &mut tracker.textures, + device, + ); + CommandBuffer::insert_barriers_from_tracker( + transit, + tracker, + &intermediate_trackers, + &snatch_guard, + ); + // Close the command buffer, and swap it with the previous. + encoder.close_and_swap().map_pass_err(pass_scope)?; + + Ok(()) + } +} + +pub mod compute_ffi { + use super::{ComputeCommand, ComputePass}; + use crate::{id, RawString}; + use std::{convert::TryInto, ffi, slice}; + use wgt::{BufferAddress, DynamicOffset}; + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `offset_length` elements. + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_set_bind_group( + pass: &mut ComputePass, + index: u32, + bind_group_id: id::BindGroupId, + offsets: *const DynamicOffset, + offset_length: usize, + ) { + let redundant = unsafe { + pass.current_bind_groups.set_and_check_redundant( + bind_group_id, + index, + &mut pass.base.dynamic_offsets, + offsets, + offset_length, + ) + }; + + if redundant { + return; + } + + pass.base.commands.push(ComputeCommand::SetBindGroup { + index, + num_dynamic_offsets: offset_length, + bind_group_id, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_set_pipeline( + pass: &mut ComputePass, + pipeline_id: id::ComputePipelineId, + ) { + if pass.current_pipeline.set_and_check_redundant(pipeline_id) { + return; + } + + pass.base + .commands + .push(ComputeCommand::SetPipeline(pipeline_id)); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `size_bytes` bytes. + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_set_push_constant( + pass: &mut ComputePass, + offset: u32, + size_bytes: u32, + data: *const u8, + ) { + assert_eq!( + offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant offset must be aligned to 4 bytes." + ); + assert_eq!( + size_bytes & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant size must be aligned to 4 bytes." + ); + let data_slice = unsafe { slice::from_raw_parts(data, size_bytes as usize) }; + let value_offset = pass.base.push_constant_data.len().try_into().expect( + "Ran out of push constant space. Don't set 4gb of push constants per ComputePass.", + ); + + pass.base.push_constant_data.extend( + data_slice + .chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) + .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])), + ); + + pass.base.commands.push(ComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset: value_offset, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_dispatch_workgroups( + pass: &mut ComputePass, + groups_x: u32, + groups_y: u32, + groups_z: u32, + ) { + pass.base + .commands + .push(ComputeCommand::Dispatch([groups_x, groups_y, groups_z])); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_dispatch_workgroups_indirect( + pass: &mut ComputePass, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + pass.base + .commands + .push(ComputeCommand::DispatchIndirect { buffer_id, offset }); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given `label` + /// is a valid null-terminated string. + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_push_debug_group( + pass: &mut ComputePass, + label: RawString, + color: u32, + ) { + let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pass.base.string_data.extend_from_slice(bytes); + + pass.base.commands.push(ComputeCommand::PushDebugGroup { + color, + len: bytes.len(), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_pop_debug_group(pass: &mut ComputePass) { + pass.base.commands.push(ComputeCommand::PopDebugGroup); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given `label` + /// is a valid null-terminated string. + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_insert_debug_marker( + pass: &mut ComputePass, + label: RawString, + color: u32, + ) { + let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pass.base.string_data.extend_from_slice(bytes); + + pass.base.commands.push(ComputeCommand::InsertDebugMarker { + color, + len: bytes.len(), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_write_timestamp( + pass: &mut ComputePass, + query_set_id: id::QuerySetId, + query_index: u32, + ) { + pass.base.commands.push(ComputeCommand::WriteTimestamp { + query_set_id, + query_index, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_begin_pipeline_statistics_query( + pass: &mut ComputePass, + query_set_id: id::QuerySetId, + query_index: u32, + ) { + pass.base + .commands + .push(ComputeCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_end_pipeline_statistics_query(pass: &mut ComputePass) { + pass.base + .commands + .push(ComputeCommand::EndPipelineStatisticsQuery); + } +} diff --git a/third_party/rust/wgpu-core/src/command/draw.rs b/third_party/rust/wgpu-core/src/command/draw.rs new file mode 100644 index 0000000000..e03a78ee93 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/draw.rs @@ -0,0 +1,247 @@ +/*! Draw structures - shared between render passes and bundles. +!*/ + +use crate::{ + binding_model::{LateMinBufferBindingSizeMismatch, PushConstantUploadError}, + error::ErrorFormatter, + id, + track::UsageConflict, + validation::{MissingBufferUsageError, MissingTextureUsageError}, +}; +use wgt::{BufferAddress, BufferSize, Color, VertexStepMode}; + +use std::num::NonZeroU32; +use thiserror::Error; + +/// Error validating a draw call. +#[derive(Clone, Debug, Error, Eq, PartialEq)] +#[non_exhaustive] +pub enum DrawError { + #[error("Blend constant needs to be set")] + MissingBlendConstant, + #[error("Render pipeline must be set")] + MissingPipeline, + #[error("Vertex buffer {index} must be set")] + MissingVertexBuffer { index: u32 }, + #[error("Index buffer must be set")] + MissingIndexBuffer, + #[error("Incompatible bind group at index {index} in the current render pipeline")] + IncompatibleBindGroup { index: u32, diff: Vec<String> }, + #[error("Vertex {last_vertex} extends beyond limit {vertex_limit} imposed by the buffer in slot {slot}. Did you bind the correct `Vertex` step-rate vertex buffer?")] + VertexBeyondLimit { + last_vertex: u64, + vertex_limit: u64, + slot: u32, + }, + #[error("{step_mode:?} buffer out of bounds at slot {slot}. Offset {offset} beyond limit {limit}. Did you bind the correct `Vertex` step-rate vertex buffer?")] + VertexOutOfBounds { + step_mode: VertexStepMode, + offset: u64, + limit: u64, + slot: u32, + }, + #[error("Instance {last_instance} extends beyond limit {instance_limit} imposed by the buffer in slot {slot}. Did you bind the correct `Instance` step-rate vertex buffer?")] + InstanceBeyondLimit { + last_instance: u64, + instance_limit: u64, + slot: u32, + }, + #[error("Index {last_index} extends beyond limit {index_limit}. Did you bind the correct index buffer?")] + IndexBeyondLimit { last_index: u64, index_limit: u64 }, + #[error( + "Pipeline index format ({pipeline:?}) and buffer index format ({buffer:?}) do not match" + )] + UnmatchedIndexFormats { + pipeline: wgt::IndexFormat, + buffer: wgt::IndexFormat, + }, + #[error(transparent)] + BindingSizeTooSmall(#[from] LateMinBufferBindingSizeMismatch), +} + +/// Error encountered when encoding a render command. +/// This is the shared error set between render bundles and passes. +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum RenderCommandError { + #[error("Bind group {0:?} is invalid")] + InvalidBindGroup(id::BindGroupId), + #[error("Render bundle {0:?} is invalid")] + InvalidRenderBundle(id::RenderBundleId), + #[error("Bind group index {index} is greater than the device's requested `max_bind_group` limit {max}")] + BindGroupIndexOutOfRange { index: u32, max: u32 }, + #[error("Vertex buffer index {index} is greater than the device's requested `max_vertex_buffers` limit {max}")] + VertexBufferIndexOutOfRange { index: u32, max: u32 }, + #[error("Dynamic buffer offset {0} does not respect device's requested `{1}` limit {2}")] + UnalignedBufferOffset(u64, &'static str, u32), + #[error("Number of buffer offsets ({actual}) does not match the number of dynamic bindings ({expected})")] + InvalidDynamicOffsetCount { actual: usize, expected: usize }, + #[error("Render pipeline {0:?} is invalid")] + InvalidPipeline(id::RenderPipelineId), + #[error("QuerySet {0:?} is invalid")] + InvalidQuerySet(id::QuerySetId), + #[error("Render pipeline targets are incompatible with render pass")] + IncompatiblePipelineTargets(#[from] crate::device::RenderPassCompatibilityError), + #[error("Pipeline writes to depth/stencil, while the pass has read-only depth/stencil")] + IncompatiblePipelineRods, + #[error(transparent)] + UsageConflict(#[from] UsageConflict), + #[error("Buffer {0:?} is destroyed")] + DestroyedBuffer(id::BufferId), + #[error(transparent)] + MissingBufferUsage(#[from] MissingBufferUsageError), + #[error(transparent)] + MissingTextureUsage(#[from] MissingTextureUsageError), + #[error(transparent)] + PushConstants(#[from] PushConstantUploadError), + #[error("Viewport has invalid rect {0:?}; origin and/or size is less than or equal to 0, and/or is not contained in the render target {1:?}")] + InvalidViewportRect(Rect<f32>, wgt::Extent3d), + #[error("Viewport minDepth {0} and/or maxDepth {1} are not in [0, 1]")] + InvalidViewportDepth(f32, f32), + #[error("Scissor {0:?} is not contained in the render target {1:?}")] + InvalidScissorRect(Rect<u32>, wgt::Extent3d), + #[error("Support for {0} is not implemented yet")] + Unimplemented(&'static str), +} +impl crate::error::PrettyError for RenderCommandError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + match *self { + Self::InvalidBindGroup(id) => { + fmt.bind_group_label(&id); + } + Self::InvalidPipeline(id) => { + fmt.render_pipeline_label(&id); + } + Self::UsageConflict(UsageConflict::TextureInvalid { id }) => { + fmt.texture_label(&id); + } + Self::UsageConflict(UsageConflict::BufferInvalid { id }) + | Self::DestroyedBuffer(id) => { + fmt.buffer_label(&id); + } + _ => {} + }; + } +} + +#[derive(Clone, Copy, Debug, Default)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Rect<T> { + pub x: T, + pub y: T, + pub w: T, + pub h: T, +} + +#[doc(hidden)] +#[derive(Clone, Copy, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum RenderCommand { + SetBindGroup { + index: u32, + num_dynamic_offsets: usize, + bind_group_id: id::BindGroupId, + }, + SetPipeline(id::RenderPipelineId), + SetIndexBuffer { + buffer_id: id::BufferId, + index_format: wgt::IndexFormat, + offset: BufferAddress, + size: Option<BufferSize>, + }, + SetVertexBuffer { + slot: u32, + buffer_id: id::BufferId, + offset: BufferAddress, + size: Option<BufferSize>, + }, + SetBlendConstant(Color), + SetStencilReference(u32), + SetViewport { + rect: Rect<f32>, + //TODO: use half-float to reduce the size? + depth_min: f32, + depth_max: f32, + }, + SetScissor(Rect<u32>), + + /// Set a range of push constants to values stored in [`BasePass::push_constant_data`]. + /// + /// See [`wgpu::RenderPass::set_push_constants`] for a detailed explanation + /// of the restrictions these commands must satisfy. + SetPushConstant { + /// Which stages we are setting push constant values for. + stages: wgt::ShaderStages, + + /// The byte offset within the push constant storage to write to. This + /// must be a multiple of four. + offset: u32, + + /// The number of bytes to write. This must be a multiple of four. + size_bytes: u32, + + /// Index in [`BasePass::push_constant_data`] of the start of the data + /// to be written. + /// + /// Note: this is not a byte offset like `offset`. Rather, it is the + /// index of the first `u32` element in `push_constant_data` to read. + /// + /// `None` means zeros should be written to the destination range, and + /// there is no corresponding data in `push_constant_data`. This is used + /// by render bundles, which explicitly clear out any state that + /// post-bundle code might see. + values_offset: Option<u32>, + }, + Draw { + vertex_count: u32, + instance_count: u32, + first_vertex: u32, + first_instance: u32, + }, + DrawIndexed { + index_count: u32, + instance_count: u32, + first_index: u32, + base_vertex: i32, + first_instance: u32, + }, + MultiDrawIndirect { + buffer_id: id::BufferId, + offset: BufferAddress, + /// Count of `None` represents a non-multi call. + count: Option<NonZeroU32>, + indexed: bool, + }, + MultiDrawIndirectCount { + buffer_id: id::BufferId, + offset: BufferAddress, + count_buffer_id: id::BufferId, + count_buffer_offset: BufferAddress, + max_count: u32, + indexed: bool, + }, + PushDebugGroup { + color: u32, + len: usize, + }, + PopDebugGroup, + InsertDebugMarker { + color: u32, + len: usize, + }, + WriteTimestamp { + query_set_id: id::QuerySetId, + query_index: u32, + }, + BeginOcclusionQuery { + query_index: u32, + }, + EndOcclusionQuery, + BeginPipelineStatisticsQuery { + query_set_id: id::QuerySetId, + query_index: u32, + }, + EndPipelineStatisticsQuery, + ExecuteBundle(id::RenderBundleId), +} diff --git a/third_party/rust/wgpu-core/src/command/memory_init.rs b/third_party/rust/wgpu-core/src/command/memory_init.rs new file mode 100644 index 0000000000..3bfc71f4f7 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/memory_init.rs @@ -0,0 +1,342 @@ +use std::{collections::hash_map::Entry, ops::Range, sync::Arc, vec::Drain}; + +use hal::CommandEncoder; + +use crate::{ + device::Device, + hal_api::HalApi, + init_tracker::*, + resource::{Resource, Texture}, + track::{TextureTracker, Tracker}, + FastHashMap, +}; + +use super::{ + clear::clear_texture, BakedCommands, ClearError, DestroyedBufferError, DestroyedTextureError, +}; + +/// Surface that was discarded by `StoreOp::Discard` of a preceding renderpass. +/// Any read access to this surface needs to be preceded by a texture initialization. +#[derive(Clone)] +pub(crate) struct TextureSurfaceDiscard<A: HalApi> { + pub texture: Arc<Texture<A>>, + pub mip_level: u32, + pub layer: u32, +} + +pub(crate) type SurfacesInDiscardState<A> = Vec<TextureSurfaceDiscard<A>>; + +pub(crate) struct CommandBufferTextureMemoryActions<A: HalApi> { + /// The tracker actions that we need to be executed before the command + /// buffer is executed. + init_actions: Vec<TextureInitTrackerAction<A>>, + /// All the discards that haven't been followed by init again within the + /// command buffer i.e. everything in this list resets the texture init + /// state *after* the command buffer execution + discards: Vec<TextureSurfaceDiscard<A>>, +} + +impl<A: HalApi> Default for CommandBufferTextureMemoryActions<A> { + fn default() -> Self { + Self { + init_actions: Default::default(), + discards: Default::default(), + } + } +} + +impl<A: HalApi> CommandBufferTextureMemoryActions<A> { + pub(crate) fn drain_init_actions(&mut self) -> Drain<TextureInitTrackerAction<A>> { + self.init_actions.drain(..) + } + + pub(crate) fn discard(&mut self, discard: TextureSurfaceDiscard<A>) { + self.discards.push(discard); + } + + // Registers a TextureInitTrackerAction. + // Returns previously discarded surface that need to be initialized *immediately* now. + // Only returns a non-empty list if action is MemoryInitKind::NeedsInitializedMemory. + #[must_use] + pub(crate) fn register_init_action( + &mut self, + action: &TextureInitTrackerAction<A>, + ) -> SurfacesInDiscardState<A> { + let mut immediately_necessary_clears = SurfacesInDiscardState::new(); + + // Note that within a command buffer we may stack arbitrary memory init + // actions on the same texture Since we react to them in sequence, they + // are going to be dropped again at queue submit + // + // We don't need to add MemoryInitKind::NeedsInitializedMemory to + // init_actions if a surface is part of the discard list. But that would + // mean splitting up the action which is more than we'd win here. + self.init_actions.extend( + action + .texture + .initialization_status + .read() + .check_action(action), + ); + + // We expect very few discarded surfaces at any point in time which is + // why a simple linear search is likely best. (i.e. most of the time + // self.discards is empty!) + let init_actions = &mut self.init_actions; + self.discards.retain(|discarded_surface| { + if discarded_surface.texture.as_info().id() == action.texture.as_info().id() + && action.range.layer_range.contains(&discarded_surface.layer) + && action + .range + .mip_range + .contains(&discarded_surface.mip_level) + { + if let MemoryInitKind::NeedsInitializedMemory = action.kind { + immediately_necessary_clears.push(discarded_surface.clone()); + + // Mark surface as implicitly initialized (this is relevant + // because it might have been uninitialized prior to + // discarding + init_actions.push(TextureInitTrackerAction { + texture: discarded_surface.texture.clone(), + range: TextureInitRange { + mip_range: discarded_surface.mip_level + ..(discarded_surface.mip_level + 1), + layer_range: discarded_surface.layer..(discarded_surface.layer + 1), + }, + kind: MemoryInitKind::ImplicitlyInitialized, + }); + } + false + } else { + true + } + }); + + immediately_necessary_clears + } + + // Shortcut for register_init_action when it is known that the action is an + // implicit init, not requiring any immediate resource init. + pub(crate) fn register_implicit_init( + &mut self, + texture: &Arc<Texture<A>>, + range: TextureInitRange, + ) { + let must_be_empty = self.register_init_action(&TextureInitTrackerAction { + texture: texture.clone(), + range, + kind: MemoryInitKind::ImplicitlyInitialized, + }); + assert!(must_be_empty.is_empty()); + } +} + +// Utility function that takes discarded surfaces from (several calls to) +// register_init_action and initializes them on the spot. +// +// Takes care of barriers as well! +pub(crate) fn fixup_discarded_surfaces< + A: HalApi, + InitIter: Iterator<Item = TextureSurfaceDiscard<A>>, +>( + inits: InitIter, + encoder: &mut A::CommandEncoder, + texture_tracker: &mut TextureTracker<A>, + device: &Device<A>, +) { + for init in inits { + clear_texture( + &init.texture, + TextureInitRange { + mip_range: init.mip_level..(init.mip_level + 1), + layer_range: init.layer..(init.layer + 1), + }, + encoder, + texture_tracker, + &device.alignments, + device.zero_buffer.as_ref().unwrap(), + ) + .unwrap(); + } +} + +impl<A: HalApi> BakedCommands<A> { + // inserts all buffer initializations that are going to be needed for + // executing the commands and updates resource init states accordingly + pub(crate) fn initialize_buffer_memory( + &mut self, + device_tracker: &mut Tracker<A>, + ) -> Result<(), DestroyedBufferError> { + // Gather init ranges for each buffer so we can collapse them. + // It is not possible to do this at an earlier point since previously + // executed command buffer change the resource init state. + let mut uninitialized_ranges_per_buffer = FastHashMap::default(); + for buffer_use in self.buffer_memory_init_actions.drain(..) { + let mut initialization_status = buffer_use.buffer.initialization_status.write(); + + // align the end to 4 + let end_remainder = buffer_use.range.end % wgt::COPY_BUFFER_ALIGNMENT; + let end = if end_remainder == 0 { + buffer_use.range.end + } else { + buffer_use.range.end + wgt::COPY_BUFFER_ALIGNMENT - end_remainder + }; + let uninitialized_ranges = initialization_status.drain(buffer_use.range.start..end); + + match buffer_use.kind { + MemoryInitKind::ImplicitlyInitialized => {} + MemoryInitKind::NeedsInitializedMemory => { + match uninitialized_ranges_per_buffer.entry(buffer_use.buffer.as_info().id()) { + Entry::Vacant(e) => { + e.insert(( + buffer_use.buffer.clone(), + uninitialized_ranges.collect::<Vec<Range<wgt::BufferAddress>>>(), + )); + } + Entry::Occupied(mut e) => { + e.get_mut().1.extend(uninitialized_ranges); + } + } + } + } + } + + for (buffer_id, (buffer, mut ranges)) in uninitialized_ranges_per_buffer { + // Collapse touching ranges. + ranges.sort_by_key(|r| r.start); + for i in (1..ranges.len()).rev() { + // The memory init tracker made sure of this! + assert!(ranges[i - 1].end <= ranges[i].start); + if ranges[i].start == ranges[i - 1].end { + ranges[i - 1].end = ranges[i].end; + ranges.swap_remove(i); // Ordering not important at this point + } + } + + // Don't do use_replace since the buffer may already no longer have + // a ref_count. + // + // However, we *know* that it is currently in use, so the tracker + // must already know about it. + let transition = device_tracker + .buffers + .set_single(&buffer, hal::BufferUses::COPY_DST) + .unwrap() + .1; + + let snatch_guard = buffer.device.snatchable_lock.read(); + let raw_buf = buffer + .raw + .get(&snatch_guard) + .ok_or(DestroyedBufferError(buffer_id))?; + + unsafe { + self.encoder.transition_buffers( + transition + .map(|pending| pending.into_hal(&buffer, &snatch_guard)) + .into_iter(), + ); + } + + for range in ranges.iter() { + assert!( + range.start % wgt::COPY_BUFFER_ALIGNMENT == 0, + "Buffer {:?} has an uninitialized range with a start \ + not aligned to 4 (start was {})", + raw_buf, + range.start + ); + assert!( + range.end % wgt::COPY_BUFFER_ALIGNMENT == 0, + "Buffer {:?} has an uninitialized range with an end \ + not aligned to 4 (end was {})", + raw_buf, + range.end + ); + + unsafe { + self.encoder.clear_buffer(raw_buf, range.clone()); + } + } + } + Ok(()) + } + + // inserts all texture initializations that are going to be needed for + // executing the commands and updates resource init states accordingly any + // textures that are left discarded by this command buffer will be marked as + // uninitialized + pub(crate) fn initialize_texture_memory( + &mut self, + device_tracker: &mut Tracker<A>, + device: &Device<A>, + ) -> Result<(), DestroyedTextureError> { + let mut ranges: Vec<TextureInitRange> = Vec::new(); + for texture_use in self.texture_memory_actions.drain_init_actions() { + let mut initialization_status = texture_use.texture.initialization_status.write(); + let use_range = texture_use.range; + let affected_mip_trackers = initialization_status + .mips + .iter_mut() + .enumerate() + .skip(use_range.mip_range.start as usize) + .take((use_range.mip_range.end - use_range.mip_range.start) as usize); + + match texture_use.kind { + MemoryInitKind::ImplicitlyInitialized => { + for (_, mip_tracker) in affected_mip_trackers { + mip_tracker.drain(use_range.layer_range.clone()); + } + } + MemoryInitKind::NeedsInitializedMemory => { + for (mip_level, mip_tracker) in affected_mip_trackers { + for layer_range in mip_tracker.drain(use_range.layer_range.clone()) { + ranges.push(TextureInitRange { + mip_range: (mip_level as u32)..(mip_level as u32 + 1), + layer_range, + }); + } + } + } + } + + // TODO: Could we attempt some range collapsing here? + for range in ranges.drain(..) { + let clear_result = clear_texture( + &texture_use.texture, + range, + &mut self.encoder, + &mut device_tracker.textures, + &device.alignments, + device.zero_buffer.as_ref().unwrap(), + ); + + // A Texture can be destroyed between the command recording + // and now, this is out of our control so we have to handle + // it gracefully. + if let Err(ClearError::InvalidTexture(id)) = clear_result { + return Err(DestroyedTextureError(id)); + } + + // Other errors are unexpected. + if let Err(error) = clear_result { + panic!("{error}"); + } + } + } + + // Now that all buffers/textures have the proper init state for before + // cmdbuf start, we discard init states for textures it left discarded + // after its execution. + for surface_discard in self.texture_memory_actions.discards.iter() { + surface_discard + .texture + .initialization_status + .write() + .discard(surface_discard.mip_level, surface_discard.layer); + } + + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/command/mod.rs b/third_party/rust/wgpu-core/src/command/mod.rs new file mode 100644 index 0000000000..2d5fca200a --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/mod.rs @@ -0,0 +1,732 @@ +mod bind; +mod bundle; +mod clear; +mod compute; +mod draw; +mod memory_init; +mod query; +mod render; +mod transfer; + +use std::slice; +use std::sync::Arc; + +pub(crate) use self::clear::clear_texture; +pub use self::{ + bundle::*, clear::ClearError, compute::*, draw::*, query::*, render::*, transfer::*, +}; + +use self::memory_init::CommandBufferTextureMemoryActions; + +use crate::device::{Device, DeviceError}; +use crate::error::{ErrorFormatter, PrettyError}; +use crate::hub::Hub; +use crate::id::CommandBufferId; +use crate::snatch::SnatchGuard; + +use crate::init_tracker::BufferInitTrackerAction; +use crate::resource::{Resource, ResourceInfo, ResourceType}; +use crate::track::{Tracker, UsageScope}; +use crate::{api_log, global::Global, hal_api::HalApi, id, resource_log, Label}; + +use hal::CommandEncoder as _; +use parking_lot::Mutex; +use thiserror::Error; + +#[cfg(feature = "trace")] +use crate::device::trace::Command as TraceCommand; + +const PUSH_CONSTANT_CLEAR_ARRAY: &[u32] = &[0_u32; 64]; + +#[derive(Debug)] +pub(crate) enum CommandEncoderStatus { + Recording, + Finished, + Error, +} + +pub(crate) struct CommandEncoder<A: HalApi> { + raw: A::CommandEncoder, + list: Vec<A::CommandBuffer>, + is_open: bool, + label: Option<String>, +} + +//TODO: handle errors better +impl<A: HalApi> CommandEncoder<A> { + /// Closes the live encoder + fn close_and_swap(&mut self) -> Result<(), DeviceError> { + if self.is_open { + self.is_open = false; + let new = unsafe { self.raw.end_encoding()? }; + self.list.insert(self.list.len() - 1, new); + } + + Ok(()) + } + + fn close(&mut self) -> Result<(), DeviceError> { + if self.is_open { + self.is_open = false; + let cmd_buf = unsafe { self.raw.end_encoding()? }; + self.list.push(cmd_buf); + } + + Ok(()) + } + + fn discard(&mut self) { + if self.is_open { + self.is_open = false; + unsafe { self.raw.discard_encoding() }; + } + } + + fn open(&mut self) -> Result<&mut A::CommandEncoder, DeviceError> { + if !self.is_open { + self.is_open = true; + let label = self.label.as_deref(); + unsafe { self.raw.begin_encoding(label)? }; + } + + Ok(&mut self.raw) + } + + fn open_pass(&mut self, label: Option<&str>) -> Result<(), DeviceError> { + self.is_open = true; + unsafe { self.raw.begin_encoding(label)? }; + + Ok(()) + } +} + +pub struct BakedCommands<A: HalApi> { + pub(crate) encoder: A::CommandEncoder, + pub(crate) list: Vec<A::CommandBuffer>, + pub(crate) trackers: Tracker<A>, + buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>, + texture_memory_actions: CommandBufferTextureMemoryActions<A>, +} + +pub(crate) struct DestroyedBufferError(pub id::BufferId); +pub(crate) struct DestroyedTextureError(pub id::TextureId); + +pub struct CommandBufferMutable<A: HalApi> { + encoder: CommandEncoder<A>, + status: CommandEncoderStatus, + pub(crate) trackers: Tracker<A>, + buffer_memory_init_actions: Vec<BufferInitTrackerAction<A>>, + texture_memory_actions: CommandBufferTextureMemoryActions<A>, + pub(crate) pending_query_resets: QueryResetMap<A>, + #[cfg(feature = "trace")] + pub(crate) commands: Option<Vec<TraceCommand>>, +} + +impl<A: HalApi> CommandBufferMutable<A> { + pub(crate) fn open_encoder_and_tracker( + &mut self, + ) -> Result<(&mut A::CommandEncoder, &mut Tracker<A>), DeviceError> { + let encoder = self.encoder.open()?; + let tracker = &mut self.trackers; + + Ok((encoder, tracker)) + } +} + +pub struct CommandBuffer<A: HalApi> { + pub(crate) device: Arc<Device<A>>, + limits: wgt::Limits, + support_clear_texture: bool, + pub(crate) info: ResourceInfo<CommandBuffer<A>>, + pub(crate) data: Mutex<Option<CommandBufferMutable<A>>>, +} + +impl<A: HalApi> Drop for CommandBuffer<A> { + fn drop(&mut self) { + if self.data.lock().is_none() { + return; + } + resource_log!("resource::CommandBuffer::drop {:?}", self.info.label()); + let mut baked = self.extract_baked_commands(); + unsafe { + baked.encoder.reset_all(baked.list.into_iter()); + } + unsafe { + use hal::Device; + self.device.raw().destroy_command_encoder(baked.encoder); + } + } +} + +impl<A: HalApi> CommandBuffer<A> { + pub(crate) fn new( + encoder: A::CommandEncoder, + device: &Arc<Device<A>>, + #[cfg(feature = "trace")] enable_tracing: bool, + label: Option<String>, + ) -> Self { + CommandBuffer { + device: device.clone(), + limits: device.limits.clone(), + support_clear_texture: device.features.contains(wgt::Features::CLEAR_TEXTURE), + info: ResourceInfo::new( + label + .as_ref() + .unwrap_or(&String::from("<CommandBuffer>")) + .as_str(), + ), + data: Mutex::new(Some(CommandBufferMutable { + encoder: CommandEncoder { + raw: encoder, + is_open: false, + list: Vec::new(), + label, + }, + status: CommandEncoderStatus::Recording, + trackers: Tracker::new(), + buffer_memory_init_actions: Default::default(), + texture_memory_actions: Default::default(), + pending_query_resets: QueryResetMap::new(), + #[cfg(feature = "trace")] + commands: if enable_tracing { + Some(Vec::new()) + } else { + None + }, + })), + } + } + + pub(crate) fn insert_barriers_from_tracker( + raw: &mut A::CommandEncoder, + base: &mut Tracker<A>, + head: &Tracker<A>, + snatch_guard: &SnatchGuard, + ) { + profiling::scope!("insert_barriers"); + + base.buffers.set_from_tracker(&head.buffers); + base.textures.set_from_tracker(&head.textures); + + Self::drain_barriers(raw, base, snatch_guard); + } + + pub(crate) fn insert_barriers_from_scope( + raw: &mut A::CommandEncoder, + base: &mut Tracker<A>, + head: &UsageScope<A>, + snatch_guard: &SnatchGuard, + ) { + profiling::scope!("insert_barriers"); + + base.buffers.set_from_usage_scope(&head.buffers); + base.textures.set_from_usage_scope(&head.textures); + + Self::drain_barriers(raw, base, snatch_guard); + } + + pub(crate) fn drain_barriers( + raw: &mut A::CommandEncoder, + base: &mut Tracker<A>, + snatch_guard: &SnatchGuard, + ) { + profiling::scope!("drain_barriers"); + + let buffer_barriers = base.buffers.drain_transitions(snatch_guard); + let (transitions, textures) = base.textures.drain_transitions(snatch_guard); + let texture_barriers = transitions + .into_iter() + .enumerate() + .map(|(i, p)| p.into_hal(textures[i].unwrap().raw().unwrap())); + + unsafe { + raw.transition_buffers(buffer_barriers); + raw.transition_textures(texture_barriers); + } + } +} + +impl<A: HalApi> CommandBuffer<A> { + fn get_encoder( + hub: &Hub<A>, + id: id::CommandEncoderId, + ) -> Result<Arc<Self>, CommandEncoderError> { + let storage = hub.command_buffers.read(); + match storage.get(id.transmute()) { + Ok(cmd_buf) => match cmd_buf.data.lock().as_ref().unwrap().status { + CommandEncoderStatus::Recording => Ok(cmd_buf.clone()), + CommandEncoderStatus::Finished => Err(CommandEncoderError::NotRecording), + CommandEncoderStatus::Error => Err(CommandEncoderError::Invalid), + }, + Err(_) => Err(CommandEncoderError::Invalid), + } + } + + pub fn is_finished(&self) -> bool { + match self.data.lock().as_ref().unwrap().status { + CommandEncoderStatus::Finished => true, + _ => false, + } + } + + pub(crate) fn extract_baked_commands(&mut self) -> BakedCommands<A> { + log::trace!( + "Extracting BakedCommands from CommandBuffer {:?}", + self.info.label() + ); + let data = self.data.lock().take().unwrap(); + BakedCommands { + encoder: data.encoder.raw, + list: data.encoder.list, + trackers: data.trackers, + buffer_memory_init_actions: data.buffer_memory_init_actions, + texture_memory_actions: data.texture_memory_actions, + } + } + + pub(crate) fn from_arc_into_baked(self: Arc<Self>) -> BakedCommands<A> { + if let Some(mut command_buffer) = Arc::into_inner(self) { + command_buffer.extract_baked_commands() + } else { + panic!("CommandBuffer cannot be destroyed because is still in use"); + } + } +} + +impl<A: HalApi> Resource for CommandBuffer<A> { + const TYPE: ResourceType = "CommandBuffer"; + + type Marker = crate::id::markers::CommandBuffer; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } + + fn label(&self) -> String { + let str = match self.data.lock().as_ref().unwrap().encoder.label.as_ref() { + Some(label) => label.clone(), + _ => String::new(), + }; + str + } +} + +#[derive(Copy, Clone, Debug)] +pub struct BasePassRef<'a, C> { + pub label: Option<&'a str>, + pub commands: &'a [C], + pub dynamic_offsets: &'a [wgt::DynamicOffset], + pub string_data: &'a [u8], + pub push_constant_data: &'a [u32], +} + +/// A stream of commands for a render pass or compute pass. +/// +/// This also contains side tables referred to by certain commands, +/// like dynamic offsets for [`SetBindGroup`] or string data for +/// [`InsertDebugMarker`]. +/// +/// Render passes use `BasePass<RenderCommand>`, whereas compute +/// passes use `BasePass<ComputeCommand>`. +/// +/// [`SetBindGroup`]: RenderCommand::SetBindGroup +/// [`InsertDebugMarker`]: RenderCommand::InsertDebugMarker +#[doc(hidden)] +#[derive(Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct BasePass<C> { + pub label: Option<String>, + + /// The stream of commands. + pub commands: Vec<C>, + + /// Dynamic offsets consumed by [`SetBindGroup`] commands in `commands`. + /// + /// Each successive `SetBindGroup` consumes the next + /// [`num_dynamic_offsets`] values from this list. + pub dynamic_offsets: Vec<wgt::DynamicOffset>, + + /// Strings used by debug instructions. + /// + /// Each successive [`PushDebugGroup`] or [`InsertDebugMarker`] + /// instruction consumes the next `len` bytes from this vector. + pub string_data: Vec<u8>, + + /// Data used by `SetPushConstant` instructions. + /// + /// See the documentation for [`RenderCommand::SetPushConstant`] + /// and [`ComputeCommand::SetPushConstant`] for details. + pub push_constant_data: Vec<u32>, +} + +impl<C: Clone> BasePass<C> { + fn new(label: &Label) -> Self { + Self { + label: label.as_ref().map(|cow| cow.to_string()), + commands: Vec::new(), + dynamic_offsets: Vec::new(), + string_data: Vec::new(), + push_constant_data: Vec::new(), + } + } + + #[cfg(feature = "trace")] + fn from_ref(base: BasePassRef<C>) -> Self { + Self { + label: base.label.map(str::to_string), + commands: base.commands.to_vec(), + dynamic_offsets: base.dynamic_offsets.to_vec(), + string_data: base.string_data.to_vec(), + push_constant_data: base.push_constant_data.to_vec(), + } + } + + pub fn as_ref(&self) -> BasePassRef<C> { + BasePassRef { + label: self.label.as_deref(), + commands: &self.commands, + dynamic_offsets: &self.dynamic_offsets, + string_data: &self.string_data, + push_constant_data: &self.push_constant_data, + } + } +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CommandEncoderError { + #[error("Command encoder is invalid")] + Invalid, + #[error("Command encoder must be active")] + NotRecording, + #[error(transparent)] + Device(#[from] DeviceError), +} + +impl Global { + pub fn command_encoder_finish<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + _desc: &wgt::CommandBufferDescriptor<Label>, + ) -> (CommandBufferId, Option<CommandEncoderError>) { + profiling::scope!("CommandEncoder::finish"); + + let hub = A::hub(self); + + let error = match hub.command_buffers.get(encoder_id.transmute()) { + Ok(cmd_buf) => { + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + match cmd_buf_data.status { + CommandEncoderStatus::Recording => { + if let Err(e) = cmd_buf_data.encoder.close() { + Some(e.into()) + } else { + cmd_buf_data.status = CommandEncoderStatus::Finished; + //Note: if we want to stop tracking the swapchain texture view, + // this is the place to do it. + log::trace!("Command buffer {:?}", encoder_id); + None + } + } + CommandEncoderStatus::Finished => Some(CommandEncoderError::NotRecording), + CommandEncoderStatus::Error => { + cmd_buf_data.encoder.discard(); + Some(CommandEncoderError::Invalid) + } + } + } + Err(_) => Some(CommandEncoderError::Invalid), + }; + + (encoder_id.transmute(), error) + } + + pub fn command_encoder_push_debug_group<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + label: &str, + ) -> Result<(), CommandEncoderError> { + profiling::scope!("CommandEncoder::push_debug_group"); + api_log!("CommandEncoder::push_debug_group {label}"); + + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, encoder_id)?; + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(TraceCommand::PushDebugGroup(label.to_string())); + } + + let cmd_buf_raw = cmd_buf_data.encoder.open()?; + if !self + .instance + .flags + .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS) + { + unsafe { + cmd_buf_raw.begin_debug_marker(label); + } + } + Ok(()) + } + + pub fn command_encoder_insert_debug_marker<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + label: &str, + ) -> Result<(), CommandEncoderError> { + profiling::scope!("CommandEncoder::insert_debug_marker"); + api_log!("CommandEncoder::insert_debug_marker {label}"); + + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, encoder_id)?; + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(TraceCommand::InsertDebugMarker(label.to_string())); + } + + if !self + .instance + .flags + .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS) + { + let cmd_buf_raw = cmd_buf_data.encoder.open()?; + unsafe { + cmd_buf_raw.insert_debug_marker(label); + } + } + Ok(()) + } + + pub fn command_encoder_pop_debug_group<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + ) -> Result<(), CommandEncoderError> { + profiling::scope!("CommandEncoder::pop_debug_marker"); + api_log!("CommandEncoder::pop_debug_group"); + + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, encoder_id)?; + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(TraceCommand::PopDebugGroup); + } + + let cmd_buf_raw = cmd_buf_data.encoder.open()?; + if !self + .instance + .flags + .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS) + { + unsafe { + cmd_buf_raw.end_debug_marker(); + } + } + Ok(()) + } +} + +fn push_constant_clear<PushFn>(offset: u32, size_bytes: u32, mut push_fn: PushFn) +where + PushFn: FnMut(u32, &[u32]), +{ + let mut count_words = 0_u32; + let size_words = size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT; + while count_words < size_words { + let count_bytes = count_words * wgt::PUSH_CONSTANT_ALIGNMENT; + let size_to_write_words = + (size_words - count_words).min(PUSH_CONSTANT_CLEAR_ARRAY.len() as u32); + + push_fn( + offset + count_bytes, + &PUSH_CONSTANT_CLEAR_ARRAY[0..size_to_write_words as usize], + ); + + count_words += size_to_write_words; + } +} + +#[derive(Debug, Copy, Clone)] +struct StateChange<T> { + last_state: Option<T>, +} + +impl<T: Copy + PartialEq> StateChange<T> { + fn new() -> Self { + Self { last_state: None } + } + fn set_and_check_redundant(&mut self, new_state: T) -> bool { + let already_set = self.last_state == Some(new_state); + self.last_state = Some(new_state); + already_set + } + fn reset(&mut self) { + self.last_state = None; + } +} + +impl<T: Copy + PartialEq> Default for StateChange<T> { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug)] +struct BindGroupStateChange { + last_states: [StateChange<id::BindGroupId>; hal::MAX_BIND_GROUPS], +} + +impl BindGroupStateChange { + fn new() -> Self { + Self { + last_states: [StateChange::new(); hal::MAX_BIND_GROUPS], + } + } + + unsafe fn set_and_check_redundant( + &mut self, + bind_group_id: id::BindGroupId, + index: u32, + dynamic_offsets: &mut Vec<u32>, + offsets: *const wgt::DynamicOffset, + offset_length: usize, + ) -> bool { + // For now never deduplicate bind groups with dynamic offsets. + if offset_length == 0 { + // If this get returns None, that means we're well over the limit, + // so let the call through to get a proper error + if let Some(current_bind_group) = self.last_states.get_mut(index as usize) { + // Bail out if we're binding the same bind group. + if current_bind_group.set_and_check_redundant(bind_group_id) { + return true; + } + } + } else { + // We intentionally remove the memory of this bind group if we have dynamic offsets, + // such that if you try to bind this bind group later with _no_ dynamic offsets it + // tries to bind it again and gives a proper validation error. + if let Some(current_bind_group) = self.last_states.get_mut(index as usize) { + current_bind_group.reset(); + } + dynamic_offsets + .extend_from_slice(unsafe { slice::from_raw_parts(offsets, offset_length) }); + } + false + } + fn reset(&mut self) { + self.last_states = [StateChange::new(); hal::MAX_BIND_GROUPS]; + } +} + +impl Default for BindGroupStateChange { + fn default() -> Self { + Self::new() + } +} + +trait MapPassErr<T, O> { + fn map_pass_err(self, scope: PassErrorScope) -> Result<T, O>; +} + +#[derive(Clone, Copy, Debug, Error)] +pub enum PassErrorScope { + #[error("In a bundle parameter")] + Bundle, + #[error("In a pass parameter")] + Pass(id::CommandEncoderId), + #[error("In a set_bind_group command")] + SetBindGroup(id::BindGroupId), + #[error("In a set_pipeline command")] + SetPipelineRender(id::RenderPipelineId), + #[error("In a set_pipeline command")] + SetPipelineCompute(id::ComputePipelineId), + #[error("In a set_push_constant command")] + SetPushConstant, + #[error("In a set_vertex_buffer command")] + SetVertexBuffer(id::BufferId), + #[error("In a set_index_buffer command")] + SetIndexBuffer(id::BufferId), + #[error("In a set_viewport command")] + SetViewport, + #[error("In a set_scissor_rect command")] + SetScissorRect, + #[error("In a draw command, indexed:{indexed} indirect:{indirect}")] + Draw { + indexed: bool, + indirect: bool, + pipeline: Option<id::RenderPipelineId>, + }, + #[error("While resetting queries after the renderpass was ran")] + QueryReset, + #[error("In a write_timestamp command")] + WriteTimestamp, + #[error("In a begin_occlusion_query command")] + BeginOcclusionQuery, + #[error("In a end_occlusion_query command")] + EndOcclusionQuery, + #[error("In a begin_pipeline_statistics_query command")] + BeginPipelineStatisticsQuery, + #[error("In a end_pipeline_statistics_query command")] + EndPipelineStatisticsQuery, + #[error("In a execute_bundle command")] + ExecuteBundle, + #[error("In a dispatch command, indirect:{indirect}")] + Dispatch { + indirect: bool, + pipeline: Option<id::ComputePipelineId>, + }, + #[error("In a pop_debug_group command")] + PopDebugGroup, +} + +impl PrettyError for PassErrorScope { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + // This error is not in the error chain, only notes are needed + match *self { + Self::Pass(id) => { + fmt.command_buffer_label(&id.transmute()); + } + Self::SetBindGroup(id) => { + fmt.bind_group_label(&id); + } + Self::SetPipelineRender(id) => { + fmt.render_pipeline_label(&id); + } + Self::SetPipelineCompute(id) => { + fmt.compute_pipeline_label(&id); + } + Self::SetVertexBuffer(id) => { + fmt.buffer_label(&id); + } + Self::SetIndexBuffer(id) => { + fmt.buffer_label(&id); + } + Self::Draw { + pipeline: Some(id), .. + } => { + fmt.render_pipeline_label(&id); + } + Self::Dispatch { + pipeline: Some(id), .. + } => { + fmt.compute_pipeline_label(&id); + } + _ => {} + } + } +} diff --git a/third_party/rust/wgpu-core/src/command/query.rs b/third_party/rust/wgpu-core/src/command/query.rs new file mode 100644 index 0000000000..39d7a9cc93 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/query.rs @@ -0,0 +1,501 @@ +use hal::CommandEncoder as _; + +#[cfg(feature = "trace")] +use crate::device::trace::Command as TraceCommand; +use crate::{ + command::{CommandBuffer, CommandEncoderError}, + device::DeviceError, + global::Global, + hal_api::HalApi, + id::{self, Id}, + init_tracker::MemoryInitKind, + resource::QuerySet, + storage::Storage, + Epoch, FastHashMap, Index, +}; +use std::{iter, marker::PhantomData}; +use thiserror::Error; +use wgt::BufferAddress; + +#[derive(Debug)] +pub(crate) struct QueryResetMap<A: HalApi> { + map: FastHashMap<Index, (Vec<bool>, Epoch)>, + _phantom: PhantomData<A>, +} +impl<A: HalApi> QueryResetMap<A> { + pub fn new() -> Self { + Self { + map: FastHashMap::default(), + _phantom: PhantomData, + } + } + + pub fn use_query_set( + &mut self, + id: id::QuerySetId, + query_set: &QuerySet<A>, + query: u32, + ) -> bool { + let (index, epoch, _) = id.unzip(); + let vec_pair = self + .map + .entry(index) + .or_insert_with(|| (vec![false; query_set.desc.count as usize], epoch)); + + std::mem::replace(&mut vec_pair.0[query as usize], true) + } + + pub fn reset_queries( + &mut self, + raw_encoder: &mut A::CommandEncoder, + query_set_storage: &Storage<QuerySet<A>>, + backend: wgt::Backend, + ) -> Result<(), id::QuerySetId> { + for (query_set_id, (state, epoch)) in self.map.drain() { + let id = Id::zip(query_set_id, epoch, backend); + let query_set = query_set_storage.get(id).map_err(|_| id)?; + + debug_assert_eq!(state.len(), query_set.desc.count as usize); + + // Need to find all "runs" of values which need resets. If the state vector is: + // [false, true, true, false, true], we want to reset [1..3, 4..5]. This minimizes + // the amount of resets needed. + let mut run_start: Option<u32> = None; + for (idx, value) in state.into_iter().chain(iter::once(false)).enumerate() { + match (run_start, value) { + // We're inside of a run, do nothing + (Some(..), true) => {} + // We've hit the end of a run, dispatch a reset + (Some(start), false) => { + run_start = None; + unsafe { raw_encoder.reset_queries(query_set.raw(), start..idx as u32) }; + } + // We're starting a run + (None, true) => { + run_start = Some(idx as u32); + } + // We're in a run of falses, do nothing. + (None, false) => {} + } + } + } + + Ok(()) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum SimplifiedQueryType { + Occlusion, + Timestamp, + PipelineStatistics, +} +impl From<wgt::QueryType> for SimplifiedQueryType { + fn from(q: wgt::QueryType) -> Self { + match q { + wgt::QueryType::Occlusion => SimplifiedQueryType::Occlusion, + wgt::QueryType::Timestamp => SimplifiedQueryType::Timestamp, + wgt::QueryType::PipelineStatistics(..) => SimplifiedQueryType::PipelineStatistics, + } + } +} + +/// Error encountered when dealing with queries +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum QueryError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error(transparent)] + Encoder(#[from] CommandEncoderError), + #[error("Error encountered while trying to use queries")] + Use(#[from] QueryUseError), + #[error("Error encountered while trying to resolve a query")] + Resolve(#[from] ResolveError), + #[error("Buffer {0:?} is invalid or destroyed")] + InvalidBuffer(id::BufferId), + #[error("QuerySet {0:?} is invalid or destroyed")] + InvalidQuerySet(id::QuerySetId), +} + +impl crate::error::PrettyError for QueryError { + fn fmt_pretty(&self, fmt: &mut crate::error::ErrorFormatter) { + fmt.error(self); + match *self { + Self::InvalidBuffer(id) => fmt.buffer_label(&id), + Self::InvalidQuerySet(id) => fmt.query_set_label(&id), + + _ => {} + } + } +} + +/// Error encountered while trying to use queries +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum QueryUseError { + #[error("Query {query_index} is out of bounds for a query set of size {query_set_size}")] + OutOfBounds { + query_index: u32, + query_set_size: u32, + }, + #[error("Query {query_index} has already been used within the same renderpass. Queries must only be used once per renderpass")] + UsedTwiceInsideRenderpass { query_index: u32 }, + #[error("Query {new_query_index} was started while query {active_query_index} was already active. No more than one statistic or occlusion query may be active at once")] + AlreadyStarted { + active_query_index: u32, + new_query_index: u32, + }, + #[error("Query was stopped while there was no active query")] + AlreadyStopped, + #[error("A query of type {query_type:?} was started using a query set of type {set_type:?}")] + IncompatibleType { + set_type: SimplifiedQueryType, + query_type: SimplifiedQueryType, + }, +} + +/// Error encountered while trying to resolve a query. +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum ResolveError { + #[error("Queries can only be resolved to buffers that contain the QUERY_RESOLVE usage")] + MissingBufferUsage, + #[error("Resolve buffer offset has to be aligned to `QUERY_RESOLVE_BUFFER_ALIGNMENT")] + BufferOffsetAlignment, + #[error("Resolving queries {start_query}..{end_query} would overrun the query set of size {query_set_size}")] + QueryOverrun { + start_query: u32, + end_query: u32, + query_set_size: u32, + }, + #[error("Resolving queries {start_query}..{end_query} ({stride} byte queries) will end up overrunning the bounds of the destination buffer of size {buffer_size} using offsets {buffer_start_offset}..{buffer_end_offset}")] + BufferOverrun { + start_query: u32, + end_query: u32, + stride: u32, + buffer_size: BufferAddress, + buffer_start_offset: BufferAddress, + buffer_end_offset: BufferAddress, + }, +} + +impl<A: HalApi> QuerySet<A> { + fn validate_query( + &self, + query_set_id: id::QuerySetId, + query_type: SimplifiedQueryType, + query_index: u32, + reset_state: Option<&mut QueryResetMap<A>>, + ) -> Result<&A::QuerySet, QueryUseError> { + // We need to defer our resets because we are in a renderpass, + // add the usage to the reset map. + if let Some(reset) = reset_state { + let used = reset.use_query_set(query_set_id, self, query_index); + if used { + return Err(QueryUseError::UsedTwiceInsideRenderpass { query_index }); + } + } + + let simple_set_type = SimplifiedQueryType::from(self.desc.ty); + if simple_set_type != query_type { + return Err(QueryUseError::IncompatibleType { + query_type, + set_type: simple_set_type, + }); + } + + if query_index >= self.desc.count { + return Err(QueryUseError::OutOfBounds { + query_index, + query_set_size: self.desc.count, + }); + } + + Ok(self.raw()) + } + + pub(super) fn validate_and_write_timestamp( + &self, + raw_encoder: &mut A::CommandEncoder, + query_set_id: id::QuerySetId, + query_index: u32, + reset_state: Option<&mut QueryResetMap<A>>, + ) -> Result<(), QueryUseError> { + let needs_reset = reset_state.is_none(); + let query_set = self.validate_query( + query_set_id, + SimplifiedQueryType::Timestamp, + query_index, + reset_state, + )?; + + unsafe { + // If we don't have a reset state tracker which can defer resets, we must reset now. + if needs_reset { + raw_encoder.reset_queries(self.raw(), query_index..(query_index + 1)); + } + raw_encoder.write_timestamp(query_set, query_index); + } + + Ok(()) + } + + pub(super) fn validate_and_begin_occlusion_query( + &self, + raw_encoder: &mut A::CommandEncoder, + query_set_id: id::QuerySetId, + query_index: u32, + reset_state: Option<&mut QueryResetMap<A>>, + active_query: &mut Option<(id::QuerySetId, u32)>, + ) -> Result<(), QueryUseError> { + let needs_reset = reset_state.is_none(); + let query_set = self.validate_query( + query_set_id, + SimplifiedQueryType::Occlusion, + query_index, + reset_state, + )?; + + if let Some((_old_id, old_idx)) = active_query.replace((query_set_id, query_index)) { + return Err(QueryUseError::AlreadyStarted { + active_query_index: old_idx, + new_query_index: query_index, + }); + } + + unsafe { + // If we don't have a reset state tracker which can defer resets, we must reset now. + if needs_reset { + raw_encoder + .reset_queries(self.raw.as_ref().unwrap(), query_index..(query_index + 1)); + } + raw_encoder.begin_query(query_set, query_index); + } + + Ok(()) + } + + pub(super) fn validate_and_begin_pipeline_statistics_query( + &self, + raw_encoder: &mut A::CommandEncoder, + query_set_id: id::QuerySetId, + query_index: u32, + reset_state: Option<&mut QueryResetMap<A>>, + active_query: &mut Option<(id::QuerySetId, u32)>, + ) -> Result<(), QueryUseError> { + let needs_reset = reset_state.is_none(); + let query_set = self.validate_query( + query_set_id, + SimplifiedQueryType::PipelineStatistics, + query_index, + reset_state, + )?; + + if let Some((_old_id, old_idx)) = active_query.replace((query_set_id, query_index)) { + return Err(QueryUseError::AlreadyStarted { + active_query_index: old_idx, + new_query_index: query_index, + }); + } + + unsafe { + // If we don't have a reset state tracker which can defer resets, we must reset now. + if needs_reset { + raw_encoder.reset_queries(self.raw(), query_index..(query_index + 1)); + } + raw_encoder.begin_query(query_set, query_index); + } + + Ok(()) + } +} + +pub(super) fn end_occlusion_query<A: HalApi>( + raw_encoder: &mut A::CommandEncoder, + storage: &Storage<QuerySet<A>>, + active_query: &mut Option<(id::QuerySetId, u32)>, +) -> Result<(), QueryUseError> { + if let Some((query_set_id, query_index)) = active_query.take() { + // We can unwrap here as the validity was validated when the active query was set + let query_set = storage.get(query_set_id).unwrap(); + + unsafe { raw_encoder.end_query(query_set.raw.as_ref().unwrap(), query_index) }; + + Ok(()) + } else { + Err(QueryUseError::AlreadyStopped) + } +} + +pub(super) fn end_pipeline_statistics_query<A: HalApi>( + raw_encoder: &mut A::CommandEncoder, + storage: &Storage<QuerySet<A>>, + active_query: &mut Option<(id::QuerySetId, u32)>, +) -> Result<(), QueryUseError> { + if let Some((query_set_id, query_index)) = active_query.take() { + // We can unwrap here as the validity was validated when the active query was set + let query_set = storage.get(query_set_id).unwrap(); + + unsafe { raw_encoder.end_query(query_set.raw(), query_index) }; + + Ok(()) + } else { + Err(QueryUseError::AlreadyStopped) + } +} + +impl Global { + pub fn command_encoder_write_timestamp<A: HalApi>( + &self, + command_encoder_id: id::CommandEncoderId, + query_set_id: id::QuerySetId, + query_index: u32, + ) -> Result<(), QueryError> { + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?; + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(TraceCommand::WriteTimestamp { + query_set_id, + query_index, + }); + } + + let encoder = &mut cmd_buf_data.encoder; + let tracker = &mut cmd_buf_data.trackers; + + let raw_encoder = encoder.open()?; + + let query_set_guard = hub.query_sets.read(); + let query_set = tracker + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(QueryError::InvalidQuerySet(query_set_id))?; + + query_set.validate_and_write_timestamp(raw_encoder, query_set_id, query_index, None)?; + + Ok(()) + } + + pub fn command_encoder_resolve_query_set<A: HalApi>( + &self, + command_encoder_id: id::CommandEncoderId, + query_set_id: id::QuerySetId, + start_query: u32, + query_count: u32, + destination: id::BufferId, + destination_offset: BufferAddress, + ) -> Result<(), QueryError> { + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?; + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(TraceCommand::ResolveQuerySet { + query_set_id, + start_query, + query_count, + destination, + destination_offset, + }); + } + + let encoder = &mut cmd_buf_data.encoder; + let tracker = &mut cmd_buf_data.trackers; + let buffer_memory_init_actions = &mut cmd_buf_data.buffer_memory_init_actions; + let raw_encoder = encoder.open()?; + + if destination_offset % wgt::QUERY_RESOLVE_BUFFER_ALIGNMENT != 0 { + return Err(QueryError::Resolve(ResolveError::BufferOffsetAlignment)); + } + let query_set_guard = hub.query_sets.read(); + let query_set = tracker + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(QueryError::InvalidQuerySet(query_set_id))?; + + let (dst_buffer, dst_pending) = { + let buffer_guard = hub.buffers.read(); + let dst_buffer = buffer_guard + .get(destination) + .map_err(|_| QueryError::InvalidBuffer(destination))?; + tracker + .buffers + .set_single(dst_buffer, hal::BufferUses::COPY_DST) + .ok_or(QueryError::InvalidBuffer(destination))? + }; + + let snatch_guard = dst_buffer.device.snatchable_lock.read(); + + let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard)); + + if !dst_buffer.usage.contains(wgt::BufferUsages::QUERY_RESOLVE) { + return Err(ResolveError::MissingBufferUsage.into()); + } + + let end_query = start_query + query_count; + if end_query > query_set.desc.count { + return Err(ResolveError::QueryOverrun { + start_query, + end_query, + query_set_size: query_set.desc.count, + } + .into()); + } + + let elements_per_query = match query_set.desc.ty { + wgt::QueryType::Occlusion => 1, + wgt::QueryType::PipelineStatistics(ps) => ps.bits().count_ones(), + wgt::QueryType::Timestamp => 1, + }; + let stride = elements_per_query * wgt::QUERY_SIZE; + let bytes_used = (stride * query_count) as BufferAddress; + + let buffer_start_offset = destination_offset; + let buffer_end_offset = buffer_start_offset + bytes_used; + + if buffer_end_offset > dst_buffer.size { + return Err(ResolveError::BufferOverrun { + start_query, + end_query, + stride, + buffer_size: dst_buffer.size, + buffer_start_offset, + buffer_end_offset, + } + .into()); + } + + // TODO(https://github.com/gfx-rs/wgpu/issues/3993): Need to track initialization state. + buffer_memory_init_actions.extend(dst_buffer.initialization_status.read().create_action( + &dst_buffer, + buffer_start_offset..buffer_end_offset, + MemoryInitKind::ImplicitlyInitialized, + )); + + let raw_dst_buffer = dst_buffer + .raw(&snatch_guard) + .ok_or(QueryError::InvalidBuffer(destination))?; + + unsafe { + raw_encoder.transition_buffers(dst_barrier.into_iter()); + raw_encoder.copy_query_results( + query_set.raw(), + start_query..end_query, + raw_dst_buffer, + destination_offset, + wgt::BufferSize::new_unchecked(stride as u64), + ); + } + + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/command/render.rs b/third_party/rust/wgpu-core/src/command/render.rs new file mode 100644 index 0000000000..d3de3e26e1 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/render.rs @@ -0,0 +1,2866 @@ +use crate::resource::Resource; +use crate::snatch::SnatchGuard; +use crate::{ + api_log, + binding_model::BindError, + command::{ + self, + bind::Binder, + end_occlusion_query, end_pipeline_statistics_query, + memory_init::{fixup_discarded_surfaces, SurfacesInDiscardState}, + BasePass, BasePassRef, BindGroupStateChange, CommandBuffer, CommandEncoderError, + CommandEncoderStatus, DrawError, ExecutionError, MapPassErr, PassErrorScope, QueryUseError, + RenderCommand, RenderCommandError, StateChange, + }, + device::{ + AttachmentData, Device, DeviceError, MissingDownlevelFlags, MissingFeatures, + RenderPassCompatibilityCheckType, RenderPassCompatibilityError, RenderPassContext, + }, + error::{ErrorFormatter, PrettyError}, + global::Global, + hal_api::HalApi, + hal_label, id, + init_tracker::{MemoryInitKind, TextureInitRange, TextureInitTrackerAction}, + pipeline::{self, PipelineFlags}, + resource::{Buffer, QuerySet, Texture, TextureView, TextureViewNotRenderableReason}, + storage::Storage, + track::{TextureSelector, Tracker, UsageConflict, UsageScope}, + validation::{ + check_buffer_usage, check_texture_usage, MissingBufferUsageError, MissingTextureUsageError, + }, + Label, +}; + +use arrayvec::ArrayVec; +use hal::CommandEncoder as _; +use thiserror::Error; +use wgt::{ + BufferAddress, BufferSize, BufferUsages, Color, IndexFormat, TextureUsages, + TextureViewDimension, VertexStepMode, +}; + +#[cfg(feature = "serde")] +use serde::Deserialize; +#[cfg(feature = "serde")] +use serde::Serialize; + +use std::sync::Arc; +use std::{borrow::Cow, fmt, iter, marker::PhantomData, mem, num::NonZeroU32, ops::Range, str}; + +use super::{ + memory_init::TextureSurfaceDiscard, CommandBufferTextureMemoryActions, CommandEncoder, + QueryResetMap, +}; + +/// Operation to perform to the output attachment at the start of a renderpass. +#[repr(C)] +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))] +pub enum LoadOp { + /// Clear the output attachment with the clear color. Clearing is faster than loading. + Clear = 0, + /// Do not clear output attachment. + Load = 1, +} + +/// Operation to perform to the output attachment at the end of a renderpass. +#[repr(C)] +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))] +pub enum StoreOp { + /// Discards the content of the render target. + /// + /// If you don't care about the contents of the target, this can be faster. + Discard = 0, + /// Store the result of the renderpass. + Store = 1, +} + +/// Describes an individual channel within a render pass, such as color, depth, or stencil. +#[repr(C)] +#[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct PassChannel<V> { + /// Operation to perform to the output attachment at the start of a + /// renderpass. + /// + /// This must be clear if it is the first renderpass rendering to a swap + /// chain image. + pub load_op: LoadOp, + /// Operation to perform to the output attachment at the end of a renderpass. + pub store_op: StoreOp, + /// If load_op is [`LoadOp::Clear`], the attachment will be cleared to this + /// color. + pub clear_value: V, + /// If true, the relevant channel is not changed by a renderpass, and the + /// corresponding attachment can be used inside the pass by other read-only + /// usages. + pub read_only: bool, +} + +impl<V> PassChannel<V> { + fn hal_ops(&self) -> hal::AttachmentOps { + let mut ops = hal::AttachmentOps::empty(); + match self.load_op { + LoadOp::Load => ops |= hal::AttachmentOps::LOAD, + LoadOp::Clear => (), + }; + match self.store_op { + StoreOp::Store => ops |= hal::AttachmentOps::STORE, + StoreOp::Discard => (), + }; + ops + } +} + +/// Describes a color attachment to a render pass. +#[repr(C)] +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct RenderPassColorAttachment { + /// The view to use as an attachment. + pub view: id::TextureViewId, + /// The view that will receive the resolved output if multisampling is used. + pub resolve_target: Option<id::TextureViewId>, + /// What operations will be performed on this color attachment. + pub channel: PassChannel<Color>, +} + +/// Describes a depth/stencil attachment to a render pass. +#[repr(C)] +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct RenderPassDepthStencilAttachment { + /// The view to use as an attachment. + pub view: id::TextureViewId, + /// What operations will be performed on the depth part of the attachment. + pub depth: PassChannel<f32>, + /// What operations will be performed on the stencil part of the attachment. + pub stencil: PassChannel<u32>, +} + +impl RenderPassDepthStencilAttachment { + /// Validate the given aspects' read-only flags against their load + /// and store ops. + /// + /// When an aspect is read-only, its load and store ops must be + /// `LoadOp::Load` and `StoreOp::Store`. + /// + /// On success, return a pair `(depth, stencil)` indicating + /// whether the depth and stencil passes are read-only. + fn depth_stencil_read_only( + &self, + aspects: hal::FormatAspects, + ) -> Result<(bool, bool), RenderPassErrorInner> { + let mut depth_read_only = true; + let mut stencil_read_only = true; + + if aspects.contains(hal::FormatAspects::DEPTH) { + if self.depth.read_only + && (self.depth.load_op, self.depth.store_op) != (LoadOp::Load, StoreOp::Store) + { + return Err(RenderPassErrorInner::InvalidDepthOps); + } + depth_read_only = self.depth.read_only; + } + + if aspects.contains(hal::FormatAspects::STENCIL) { + if self.stencil.read_only + && (self.stencil.load_op, self.stencil.store_op) != (LoadOp::Load, StoreOp::Store) + { + return Err(RenderPassErrorInner::InvalidStencilOps); + } + stencil_read_only = self.stencil.read_only; + } + + Ok((depth_read_only, stencil_read_only)) + } +} + +/// Location to write a timestamp to (beginning or end of the pass). +#[repr(C)] +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))] +pub enum RenderPassTimestampLocation { + Beginning = 0, + End = 1, +} + +/// Describes the writing of timestamp values in a render pass. +#[repr(C)] +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct RenderPassTimestampWrites { + /// The query set to write the timestamp to. + pub query_set: id::QuerySetId, + /// The index of the query set at which a start timestamp of this pass is written, if any. + pub beginning_of_pass_write_index: Option<u32>, + /// The index of the query set at which an end timestamp of this pass is written, if any. + pub end_of_pass_write_index: Option<u32>, +} + +/// Describes the attachments of a render pass. +#[derive(Clone, Debug, Default, PartialEq)] +pub struct RenderPassDescriptor<'a> { + pub label: Label<'a>, + /// The color attachments of the render pass. + pub color_attachments: Cow<'a, [Option<RenderPassColorAttachment>]>, + /// The depth and stencil attachment of the render pass, if any. + pub depth_stencil_attachment: Option<&'a RenderPassDepthStencilAttachment>, + /// Defines where and when timestamp values will be written for this pass. + pub timestamp_writes: Option<&'a RenderPassTimestampWrites>, + /// Defines where the occlusion query results will be stored for this pass. + pub occlusion_query_set: Option<id::QuerySetId>, +} + +#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))] +pub struct RenderPass { + base: BasePass<RenderCommand>, + parent_id: id::CommandEncoderId, + color_targets: ArrayVec<Option<RenderPassColorAttachment>, { hal::MAX_COLOR_ATTACHMENTS }>, + depth_stencil_target: Option<RenderPassDepthStencilAttachment>, + timestamp_writes: Option<RenderPassTimestampWrites>, + occlusion_query_set_id: Option<id::QuerySetId>, + + // Resource binding dedupe state. + #[cfg_attr(feature = "serde", serde(skip))] + current_bind_groups: BindGroupStateChange, + #[cfg_attr(feature = "serde", serde(skip))] + current_pipeline: StateChange<id::RenderPipelineId>, +} + +impl RenderPass { + pub fn new(parent_id: id::CommandEncoderId, desc: &RenderPassDescriptor) -> Self { + Self { + base: BasePass::new(&desc.label), + parent_id, + color_targets: desc.color_attachments.iter().cloned().collect(), + depth_stencil_target: desc.depth_stencil_attachment.cloned(), + timestamp_writes: desc.timestamp_writes.cloned(), + occlusion_query_set_id: desc.occlusion_query_set, + + current_bind_groups: BindGroupStateChange::new(), + current_pipeline: StateChange::new(), + } + } + + pub fn parent_id(&self) -> id::CommandEncoderId { + self.parent_id + } + + #[cfg(feature = "trace")] + pub fn into_command(self) -> crate::device::trace::Command { + crate::device::trace::Command::RunRenderPass { + base: self.base, + target_colors: self.color_targets.into_iter().collect(), + target_depth_stencil: self.depth_stencil_target, + timestamp_writes: self.timestamp_writes, + occlusion_query_set_id: self.occlusion_query_set_id, + } + } + + pub fn set_index_buffer( + &mut self, + buffer_id: id::BufferId, + index_format: IndexFormat, + offset: BufferAddress, + size: Option<BufferSize>, + ) { + self.base.commands.push(RenderCommand::SetIndexBuffer { + buffer_id, + index_format, + offset, + size, + }); + } +} + +impl fmt::Debug for RenderPass { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RenderPass") + .field("encoder_id", &self.parent_id) + .field("color_targets", &self.color_targets) + .field("depth_stencil_target", &self.depth_stencil_target) + .field("command count", &self.base.commands.len()) + .field("dynamic offset count", &self.base.dynamic_offsets.len()) + .field( + "push constant u32 count", + &self.base.push_constant_data.len(), + ) + .finish() + } +} + +#[derive(Debug, PartialEq)] +enum OptionalState { + Unused, + Required, + Set, +} + +impl OptionalState { + fn require(&mut self, require: bool) { + if require && *self == Self::Unused { + *self = Self::Required; + } + } +} + +#[derive(Debug, Default)] +struct IndexState { + bound_buffer_view: Option<(id::BufferId, Range<BufferAddress>)>, + format: Option<IndexFormat>, + pipeline_format: Option<IndexFormat>, + limit: u64, +} + +impl IndexState { + fn update_limit(&mut self) { + self.limit = match self.bound_buffer_view { + Some((_, ref range)) => { + let format = self + .format + .expect("IndexState::update_limit must be called after a index buffer is set"); + let shift = match format { + IndexFormat::Uint16 => 1, + IndexFormat::Uint32 => 2, + }; + + (range.end - range.start) >> shift + } + None => 0, + } + } + + fn reset(&mut self) { + self.bound_buffer_view = None; + self.limit = 0; + } +} + +#[derive(Clone, Copy, Debug)] +struct VertexBufferState { + total_size: BufferAddress, + step: pipeline::VertexStep, + bound: bool, +} + +impl VertexBufferState { + const EMPTY: Self = Self { + total_size: 0, + step: pipeline::VertexStep { + stride: 0, + last_stride: 0, + mode: VertexStepMode::Vertex, + }, + bound: false, + }; +} + +#[derive(Debug, Default)] +struct VertexState { + inputs: ArrayVec<VertexBufferState, { hal::MAX_VERTEX_BUFFERS }>, + /// Length of the shortest vertex rate vertex buffer + vertex_limit: u64, + /// Buffer slot which the shortest vertex rate vertex buffer is bound to + vertex_limit_slot: u32, + /// Length of the shortest instance rate vertex buffer + instance_limit: u64, + /// Buffer slot which the shortest instance rate vertex buffer is bound to + instance_limit_slot: u32, + /// Total amount of buffers required by the pipeline. + buffers_required: u32, +} + +impl VertexState { + fn update_limits(&mut self) { + // Implements the validation from https://gpuweb.github.io/gpuweb/#dom-gpurendercommandsmixin-draw + // Except that the formula is shuffled to extract the number of vertices in order + // to carry the bulk of the computation when changing states instead of when producing + // draws. Draw calls tend to happen at a higher frequency. Here we determine vertex + // limits that can be cheaply checked for each draw call. + self.vertex_limit = u32::MAX as u64; + self.instance_limit = u32::MAX as u64; + for (idx, vbs) in self.inputs.iter().enumerate() { + if !vbs.bound { + continue; + } + + let limit = if vbs.total_size < vbs.step.last_stride { + // The buffer cannot fit the last vertex. + 0 + } else { + if vbs.step.stride == 0 { + // We already checked that the last stride fits, the same + // vertex will be repeated so this slot can accommodate any number of + // vertices. + continue; + } + + // The general case. + (vbs.total_size - vbs.step.last_stride) / vbs.step.stride + 1 + }; + + match vbs.step.mode { + VertexStepMode::Vertex => { + if limit < self.vertex_limit { + self.vertex_limit = limit; + self.vertex_limit_slot = idx as _; + } + } + VertexStepMode::Instance => { + if limit < self.instance_limit { + self.instance_limit = limit; + self.instance_limit_slot = idx as _; + } + } + } + } + } + + fn reset(&mut self) { + self.inputs.clear(); + self.vertex_limit = 0; + self.instance_limit = 0; + } +} + +#[derive(Debug)] +struct State<A: HalApi> { + pipeline_flags: PipelineFlags, + binder: Binder<A>, + blend_constant: OptionalState, + stencil_reference: u32, + pipeline: Option<id::RenderPipelineId>, + index: IndexState, + vertex: VertexState, + debug_scope_depth: u32, +} + +impl<A: HalApi> State<A> { + fn is_ready(&self, indexed: bool) -> Result<(), DrawError> { + // Determine how many vertex buffers have already been bound + let vertex_buffer_count = self.vertex.inputs.iter().take_while(|v| v.bound).count() as u32; + // Compare with the needed quantity + if vertex_buffer_count < self.vertex.buffers_required { + return Err(DrawError::MissingVertexBuffer { + index: vertex_buffer_count, + }); + } + + let bind_mask = self.binder.invalid_mask(); + if bind_mask != 0 { + //let (expected, provided) = self.binder.entries[index as usize].info(); + return Err(DrawError::IncompatibleBindGroup { + index: bind_mask.trailing_zeros(), + diff: self.binder.bgl_diff(), + }); + } + if self.pipeline.is_none() { + return Err(DrawError::MissingPipeline); + } + if self.blend_constant == OptionalState::Required { + return Err(DrawError::MissingBlendConstant); + } + + if indexed { + // Pipeline expects an index buffer + if let Some(pipeline_index_format) = self.index.pipeline_format { + // We have a buffer bound + let buffer_index_format = self.index.format.ok_or(DrawError::MissingIndexBuffer)?; + + // The buffers are different formats + if pipeline_index_format != buffer_index_format { + return Err(DrawError::UnmatchedIndexFormats { + pipeline: pipeline_index_format, + buffer: buffer_index_format, + }); + } + } + } + + self.binder.check_late_buffer_bindings()?; + + Ok(()) + } + + /// Reset the `RenderBundle`-related states. + fn reset_bundle(&mut self) { + self.binder.reset(); + self.pipeline = None; + self.index.reset(); + self.vertex.reset(); + } +} + +/// Describes an attachment location in words. +/// +/// Can be used as "the {loc} has..." or "{loc} has..." +#[derive(Debug, Copy, Clone)] +pub enum AttachmentErrorLocation { + Color { index: usize, resolve: bool }, + Depth, +} + +impl fmt::Display for AttachmentErrorLocation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + AttachmentErrorLocation::Color { + index, + resolve: false, + } => write!(f, "color attachment at index {index}'s texture view"), + AttachmentErrorLocation::Color { + index, + resolve: true, + } => write!( + f, + "color attachment at index {index}'s resolve texture view" + ), + AttachmentErrorLocation::Depth => write!(f, "depth attachment's texture view"), + } + } +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum ColorAttachmentError { + #[error("Attachment format {0:?} is not a color format")] + InvalidFormat(wgt::TextureFormat), + #[error("The number of color attachments {given} exceeds the limit {limit}")] + TooMany { given: usize, limit: usize }, +} + +/// Error encountered when performing a render pass. +#[derive(Clone, Debug, Error)] +pub enum RenderPassErrorInner { + #[error(transparent)] + Device(DeviceError), + #[error(transparent)] + ColorAttachment(#[from] ColorAttachmentError), + #[error(transparent)] + Encoder(#[from] CommandEncoderError), + #[error("Attachment texture view {0:?} is invalid")] + InvalidAttachment(id::TextureViewId), + #[error("Attachment texture view {0:?} is invalid")] + InvalidResolveTarget(id::TextureViewId), + #[error("The format of the depth-stencil attachment ({0:?}) is not a depth-stencil format")] + InvalidDepthStencilAttachmentFormat(wgt::TextureFormat), + #[error("The format of the {location} ({format:?}) is not resolvable")] + UnsupportedResolveTargetFormat { + location: AttachmentErrorLocation, + format: wgt::TextureFormat, + }, + #[error("No color attachments or depth attachments were provided, at least one attachment of any kind must be provided")] + MissingAttachments, + #[error("The {location} is not renderable:")] + TextureViewIsNotRenderable { + location: AttachmentErrorLocation, + #[source] + reason: TextureViewNotRenderableReason, + }, + #[error("Attachments have differing sizes: the {expected_location} has extent {expected_extent:?} but is followed by the {actual_location} which has {actual_extent:?}")] + AttachmentsDimensionMismatch { + expected_location: AttachmentErrorLocation, + expected_extent: wgt::Extent3d, + actual_location: AttachmentErrorLocation, + actual_extent: wgt::Extent3d, + }, + #[error("Attachments have differing sample counts: the {expected_location} has count {expected_samples:?} but is followed by the {actual_location} which has count {actual_samples:?}")] + AttachmentSampleCountMismatch { + expected_location: AttachmentErrorLocation, + expected_samples: u32, + actual_location: AttachmentErrorLocation, + actual_samples: u32, + }, + #[error("The resolve source, {location}, must be multi-sampled (has {src} samples) while the resolve destination must not be multisampled (has {dst} samples)")] + InvalidResolveSampleCounts { + location: AttachmentErrorLocation, + src: u32, + dst: u32, + }, + #[error( + "Resource source, {location}, format ({src:?}) must match the resolve destination format ({dst:?})" + )] + MismatchedResolveTextureFormat { + location: AttachmentErrorLocation, + src: wgt::TextureFormat, + dst: wgt::TextureFormat, + }, + #[error("Surface texture is dropped before the render pass is finished")] + SurfaceTextureDropped, + #[error("Not enough memory left for render pass")] + OutOfMemory, + #[error("The bind group at index {0:?} is invalid")] + InvalidBindGroup(usize), + #[error("Unable to clear non-present/read-only depth")] + InvalidDepthOps, + #[error("Unable to clear non-present/read-only stencil")] + InvalidStencilOps, + #[error("Setting `values_offset` to be `None` is only for internal use in render bundles")] + InvalidValuesOffset, + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), + #[error("Indirect draw uses bytes {offset}..{end_offset} {} which overruns indirect buffer of size {buffer_size}", + count.map_or_else(String::new, |v| format!("(using count {v})")))] + IndirectBufferOverrun { + count: Option<NonZeroU32>, + offset: u64, + end_offset: u64, + buffer_size: u64, + }, + #[error("Indirect draw uses bytes {begin_count_offset}..{end_count_offset} which overruns indirect buffer of size {count_buffer_size}")] + IndirectCountBufferOverrun { + begin_count_offset: u64, + end_count_offset: u64, + count_buffer_size: u64, + }, + #[error("Cannot pop debug group, because number of pushed debug groups is zero")] + InvalidPopDebugGroup, + #[error(transparent)] + ResourceUsageConflict(#[from] UsageConflict), + #[error("Render bundle has incompatible targets, {0}")] + IncompatibleBundleTargets(#[from] RenderPassCompatibilityError), + #[error( + "Render bundle has incompatible read-only flags: \ + bundle has flags depth = {bundle_depth} and stencil = {bundle_stencil}, \ + while the pass has flags depth = {pass_depth} and stencil = {pass_stencil}. \ + Read-only renderpasses are only compatible with read-only bundles for that aspect." + )] + IncompatibleBundleReadOnlyDepthStencil { + pass_depth: bool, + pass_stencil: bool, + bundle_depth: bool, + bundle_stencil: bool, + }, + #[error(transparent)] + RenderCommand(#[from] RenderCommandError), + #[error(transparent)] + Draw(#[from] DrawError), + #[error(transparent)] + Bind(#[from] BindError), + #[error(transparent)] + QueryUse(#[from] QueryUseError), + #[error("Multiview layer count must match")] + MultiViewMismatch, + #[error( + "Multiview pass texture views with more than one array layer must have D2Array dimension" + )] + MultiViewDimensionMismatch, + #[error("QuerySet {0:?} is invalid")] + InvalidQuerySet(id::QuerySetId), + #[error("missing occlusion query set")] + MissingOcclusionQuerySet, +} + +impl PrettyError for RenderPassErrorInner { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + if let Self::InvalidAttachment(id) = *self { + fmt.texture_view_label_with_key(&id, "attachment"); + }; + if let Self::Draw(DrawError::IncompatibleBindGroup { diff, .. }) = self { + for d in diff { + fmt.note(&d); + } + }; + } +} + +impl From<MissingBufferUsageError> for RenderPassErrorInner { + fn from(error: MissingBufferUsageError) -> Self { + Self::RenderCommand(error.into()) + } +} + +impl From<MissingTextureUsageError> for RenderPassErrorInner { + fn from(error: MissingTextureUsageError) -> Self { + Self::RenderCommand(error.into()) + } +} + +impl From<DeviceError> for RenderPassErrorInner { + fn from(error: DeviceError) -> Self { + Self::Device(error) + } +} + +/// Error encountered when performing a render pass. +#[derive(Clone, Debug, Error)] +#[error("{scope}")] +pub struct RenderPassError { + pub scope: PassErrorScope, + #[source] + inner: RenderPassErrorInner, +} +impl PrettyError for RenderPassError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + // This error is wrapper for the inner error, + // but the scope has useful labels + fmt.error(self); + self.scope.fmt_pretty(fmt); + } +} + +impl<T, E> MapPassErr<T, RenderPassError> for Result<T, E> +where + E: Into<RenderPassErrorInner>, +{ + fn map_pass_err(self, scope: PassErrorScope) -> Result<T, RenderPassError> { + self.map_err(|inner| RenderPassError { + scope, + inner: inner.into(), + }) + } +} + +struct RenderAttachment<'a, A: HalApi> { + texture: Arc<Texture<A>>, + selector: &'a TextureSelector, + usage: hal::TextureUses, +} + +impl<A: HalApi> TextureView<A> { + fn to_render_attachment(&self, usage: hal::TextureUses) -> RenderAttachment<A> { + RenderAttachment { + texture: self.parent.clone(), + selector: &self.selector, + usage, + } + } +} + +const MAX_TOTAL_ATTACHMENTS: usize = hal::MAX_COLOR_ATTACHMENTS + hal::MAX_COLOR_ATTACHMENTS + 1; +type AttachmentDataVec<T> = ArrayVec<T, MAX_TOTAL_ATTACHMENTS>; + +struct RenderPassInfo<'a, A: HalApi> { + context: RenderPassContext, + usage_scope: UsageScope<A>, + /// All render attachments, including depth/stencil + render_attachments: AttachmentDataVec<RenderAttachment<'a, A>>, + is_depth_read_only: bool, + is_stencil_read_only: bool, + extent: wgt::Extent3d, + _phantom: PhantomData<A>, + + pending_discard_init_fixups: SurfacesInDiscardState<A>, + divergent_discarded_depth_stencil_aspect: Option<(wgt::TextureAspect, &'a TextureView<A>)>, + multiview: Option<NonZeroU32>, +} + +impl<'a, A: HalApi> RenderPassInfo<'a, A> { + fn add_pass_texture_init_actions<V>( + channel: &PassChannel<V>, + texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>, + view: &TextureView<A>, + pending_discard_init_fixups: &mut SurfacesInDiscardState<A>, + ) { + if channel.load_op == LoadOp::Load { + pending_discard_init_fixups.extend(texture_memory_actions.register_init_action( + &TextureInitTrackerAction { + texture: view.parent.clone(), + range: TextureInitRange::from(view.selector.clone()), + // Note that this is needed even if the target is discarded, + kind: MemoryInitKind::NeedsInitializedMemory, + }, + )); + } else if channel.store_op == StoreOp::Store { + // Clear + Store + texture_memory_actions.register_implicit_init( + &view.parent, + TextureInitRange::from(view.selector.clone()), + ); + } + if channel.store_op == StoreOp::Discard { + // the discard happens at the *end* of a pass, but recording the + // discard right away be alright since the texture can't be used + // during the pass anyways + texture_memory_actions.discard(TextureSurfaceDiscard { + texture: view.parent.clone(), + mip_level: view.selector.mips.start, + layer: view.selector.layers.start, + }); + } + } + + fn start( + device: &Device<A>, + label: Option<&str>, + color_attachments: &[Option<RenderPassColorAttachment>], + depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>, + timestamp_writes: Option<&RenderPassTimestampWrites>, + occlusion_query_set: Option<id::QuerySetId>, + encoder: &mut CommandEncoder<A>, + trackers: &mut Tracker<A>, + texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>, + pending_query_resets: &mut QueryResetMap<A>, + view_guard: &'a Storage<TextureView<A>>, + buffer_guard: &'a Storage<Buffer<A>>, + texture_guard: &'a Storage<Texture<A>>, + query_set_guard: &'a Storage<QuerySet<A>>, + snatch_guard: &SnatchGuard<'a>, + ) -> Result<Self, RenderPassErrorInner> { + profiling::scope!("RenderPassInfo::start"); + + // We default to false intentionally, even if depth-stencil isn't used at all. + // This allows us to use the primary raw pipeline in `RenderPipeline`, + // instead of the special read-only one, which would be `None`. + let mut is_depth_read_only = false; + let mut is_stencil_read_only = false; + + let mut render_attachments = AttachmentDataVec::<RenderAttachment<A>>::new(); + let mut discarded_surfaces = AttachmentDataVec::new(); + let mut pending_discard_init_fixups = SurfacesInDiscardState::new(); + let mut divergent_discarded_depth_stencil_aspect = None; + + let mut attachment_location = AttachmentErrorLocation::Color { + index: usize::MAX, + resolve: false, + }; + let mut extent = None; + let mut sample_count = 0; + + let mut detected_multiview: Option<Option<NonZeroU32>> = None; + + let mut check_multiview = |view: &TextureView<A>| { + // Get the multiview configuration for this texture view + let layers = view.selector.layers.end - view.selector.layers.start; + let this_multiview = if layers >= 2 { + // Trivially proven by the if above + Some(unsafe { NonZeroU32::new_unchecked(layers) }) + } else { + None + }; + + // Make sure that if this view is a multiview, it is set to be an array + if this_multiview.is_some() && view.desc.dimension != TextureViewDimension::D2Array { + return Err(RenderPassErrorInner::MultiViewDimensionMismatch); + } + + // Validate matching first, or store the first one + if let Some(multiview) = detected_multiview { + if multiview != this_multiview { + return Err(RenderPassErrorInner::MultiViewMismatch); + } + } else { + // Multiview is only supported if the feature is enabled + if this_multiview.is_some() { + device.require_features(wgt::Features::MULTIVIEW)?; + } + + detected_multiview = Some(this_multiview); + } + + Ok(()) + }; + let mut add_view = |view: &TextureView<A>, location| { + let render_extent = view.render_extent.map_err(|reason| { + RenderPassErrorInner::TextureViewIsNotRenderable { location, reason } + })?; + if let Some(ex) = extent { + if ex != render_extent { + return Err(RenderPassErrorInner::AttachmentsDimensionMismatch { + expected_location: attachment_location, + expected_extent: ex, + actual_location: location, + actual_extent: render_extent, + }); + } + } else { + extent = Some(render_extent); + } + if sample_count == 0 { + sample_count = view.samples; + } else if sample_count != view.samples { + return Err(RenderPassErrorInner::AttachmentSampleCountMismatch { + expected_location: attachment_location, + expected_samples: sample_count, + actual_location: location, + actual_samples: view.samples, + }); + } + attachment_location = location; + Ok(()) + }; + + let mut colors = + ArrayVec::<Option<hal::ColorAttachment<A>>, { hal::MAX_COLOR_ATTACHMENTS }>::new(); + let mut depth_stencil = None; + + if let Some(at) = depth_stencil_attachment { + let view: &TextureView<A> = trackers + .views + .add_single(view_guard, at.view) + .ok_or(RenderPassErrorInner::InvalidAttachment(at.view))?; + check_multiview(view)?; + add_view(view, AttachmentErrorLocation::Depth)?; + + let ds_aspects = view.desc.aspects(); + if ds_aspects.contains(hal::FormatAspects::COLOR) { + return Err(RenderPassErrorInner::InvalidDepthStencilAttachmentFormat( + view.desc.format, + )); + } + + if !ds_aspects.contains(hal::FormatAspects::STENCIL) + || (at.stencil.load_op == at.depth.load_op + && at.stencil.store_op == at.depth.store_op) + { + Self::add_pass_texture_init_actions( + &at.depth, + texture_memory_actions, + view, + &mut pending_discard_init_fixups, + ); + } else if !ds_aspects.contains(hal::FormatAspects::DEPTH) { + Self::add_pass_texture_init_actions( + &at.stencil, + texture_memory_actions, + view, + &mut pending_discard_init_fixups, + ); + } else { + // This is the only place (anywhere in wgpu) where Stencil & + // Depth init state can diverge. + // + // To safe us the overhead of tracking init state of texture + // aspects everywhere, we're going to cheat a little bit in + // order to keep the init state of both Stencil and Depth + // aspects in sync. The expectation is that we hit this path + // extremely rarely! + // + // Diverging LoadOp, i.e. Load + Clear: + // + // Record MemoryInitKind::NeedsInitializedMemory for the entire + // surface, a bit wasteful on unit but no negative effect! + // + // Rationale: If the loaded channel is uninitialized it needs + // clearing, the cleared channel doesn't care. (If everything is + // already initialized nothing special happens) + // + // (possible minor optimization: Clear caused by + // NeedsInitializedMemory should know that it doesn't need to + // clear the aspect that was set to C) + let need_init_beforehand = + at.depth.load_op == LoadOp::Load || at.stencil.load_op == LoadOp::Load; + if need_init_beforehand { + pending_discard_init_fixups.extend( + texture_memory_actions.register_init_action(&TextureInitTrackerAction { + texture: view.parent.clone(), + range: TextureInitRange::from(view.selector.clone()), + kind: MemoryInitKind::NeedsInitializedMemory, + }), + ); + } + + // Diverging Store, i.e. Discard + Store: + // + // Immediately zero out channel that is set to discard after + // we're done with the render pass. This allows us to set the + // entire surface to MemoryInitKind::ImplicitlyInitialized (if + // it isn't already set to NeedsInitializedMemory). + // + // (possible optimization: Delay and potentially drop this zeroing) + if at.depth.store_op != at.stencil.store_op { + if !need_init_beforehand { + texture_memory_actions.register_implicit_init( + &view.parent, + TextureInitRange::from(view.selector.clone()), + ); + } + divergent_discarded_depth_stencil_aspect = Some(( + if at.depth.store_op == StoreOp::Discard { + wgt::TextureAspect::DepthOnly + } else { + wgt::TextureAspect::StencilOnly + }, + view, + )); + } else if at.depth.store_op == StoreOp::Discard { + // Both are discarded using the regular path. + discarded_surfaces.push(TextureSurfaceDiscard { + texture: view.parent.clone(), + mip_level: view.selector.mips.start, + layer: view.selector.layers.start, + }); + } + } + + (is_depth_read_only, is_stencil_read_only) = at.depth_stencil_read_only(ds_aspects)?; + + let usage = if is_depth_read_only + && is_stencil_read_only + && device + .downlevel + .flags + .contains(wgt::DownlevelFlags::READ_ONLY_DEPTH_STENCIL) + { + hal::TextureUses::DEPTH_STENCIL_READ | hal::TextureUses::RESOURCE + } else { + hal::TextureUses::DEPTH_STENCIL_WRITE + }; + render_attachments.push(view.to_render_attachment(usage)); + + depth_stencil = Some(hal::DepthStencilAttachment { + target: hal::Attachment { + view: view + .raw(snatch_guard) + .ok_or_else(|| RenderPassErrorInner::InvalidAttachment(view.info.id()))?, + usage, + }, + depth_ops: at.depth.hal_ops(), + stencil_ops: at.stencil.hal_ops(), + clear_value: (at.depth.clear_value, at.stencil.clear_value), + }); + } + + for (index, attachment) in color_attachments.iter().enumerate() { + let at = if let Some(attachment) = attachment.as_ref() { + attachment + } else { + colors.push(None); + continue; + }; + let color_view: &TextureView<A> = trackers + .views + .add_single(view_guard, at.view) + .ok_or(RenderPassErrorInner::InvalidAttachment(at.view))?; + check_multiview(color_view)?; + add_view( + color_view, + AttachmentErrorLocation::Color { + index, + resolve: false, + }, + )?; + + if !color_view + .desc + .aspects() + .contains(hal::FormatAspects::COLOR) + { + return Err(RenderPassErrorInner::ColorAttachment( + ColorAttachmentError::InvalidFormat(color_view.desc.format), + )); + } + + Self::add_pass_texture_init_actions( + &at.channel, + texture_memory_actions, + color_view, + &mut pending_discard_init_fixups, + ); + render_attachments + .push(color_view.to_render_attachment(hal::TextureUses::COLOR_TARGET)); + + let mut hal_resolve_target = None; + if let Some(resolve_target) = at.resolve_target { + let resolve_view: &TextureView<A> = trackers + .views + .add_single(view_guard, resolve_target) + .ok_or(RenderPassErrorInner::InvalidAttachment(resolve_target))?; + + check_multiview(resolve_view)?; + + let resolve_location = AttachmentErrorLocation::Color { + index, + resolve: true, + }; + + let render_extent = resolve_view.render_extent.map_err(|reason| { + RenderPassErrorInner::TextureViewIsNotRenderable { + location: resolve_location, + reason, + } + })?; + if color_view.render_extent.unwrap() != render_extent { + return Err(RenderPassErrorInner::AttachmentsDimensionMismatch { + expected_location: attachment_location, + expected_extent: extent.unwrap_or_default(), + actual_location: resolve_location, + actual_extent: render_extent, + }); + } + if color_view.samples == 1 || resolve_view.samples != 1 { + return Err(RenderPassErrorInner::InvalidResolveSampleCounts { + location: resolve_location, + src: color_view.samples, + dst: resolve_view.samples, + }); + } + if color_view.desc.format != resolve_view.desc.format { + return Err(RenderPassErrorInner::MismatchedResolveTextureFormat { + location: resolve_location, + src: color_view.desc.format, + dst: resolve_view.desc.format, + }); + } + if !resolve_view + .format_features + .flags + .contains(wgt::TextureFormatFeatureFlags::MULTISAMPLE_RESOLVE) + { + return Err(RenderPassErrorInner::UnsupportedResolveTargetFormat { + location: resolve_location, + format: resolve_view.desc.format, + }); + } + + texture_memory_actions.register_implicit_init( + &resolve_view.parent, + TextureInitRange::from(resolve_view.selector.clone()), + ); + render_attachments + .push(resolve_view.to_render_attachment(hal::TextureUses::COLOR_TARGET)); + + hal_resolve_target = Some(hal::Attachment { + view: resolve_view.raw(snatch_guard).ok_or_else(|| { + RenderPassErrorInner::InvalidResolveTarget(resolve_view.info.id()) + })?, + usage: hal::TextureUses::COLOR_TARGET, + }); + } + + colors.push(Some(hal::ColorAttachment { + target: hal::Attachment { + view: color_view.raw(snatch_guard).ok_or_else(|| { + RenderPassErrorInner::InvalidAttachment(color_view.info.id()) + })?, + usage: hal::TextureUses::COLOR_TARGET, + }, + resolve_target: hal_resolve_target, + ops: at.channel.hal_ops(), + clear_value: at.channel.clear_value, + })); + } + + let extent = extent.ok_or(RenderPassErrorInner::MissingAttachments)?; + let multiview = detected_multiview.expect("Multiview was not detected, no attachments"); + + let view_data = AttachmentData { + colors: color_attachments + .iter() + .map(|at| at.as_ref().map(|at| view_guard.get(at.view).unwrap())) + .collect(), + resolves: color_attachments + .iter() + .filter_map(|at| match *at { + Some(RenderPassColorAttachment { + resolve_target: Some(resolve), + .. + }) => Some(view_guard.get(resolve).unwrap()), + _ => None, + }) + .collect(), + depth_stencil: depth_stencil_attachment.map(|at| view_guard.get(at.view).unwrap()), + }; + + let context = RenderPassContext { + attachments: view_data.map(|view| view.desc.format), + sample_count, + multiview, + }; + + let timestamp_writes = if let Some(tw) = timestamp_writes { + let query_set = trackers + .query_sets + .add_single(query_set_guard, tw.query_set) + .ok_or(RenderPassErrorInner::InvalidQuerySet(tw.query_set))?; + + if let Some(index) = tw.beginning_of_pass_write_index { + pending_query_resets.use_query_set(tw.query_set, query_set, index); + } + if let Some(index) = tw.end_of_pass_write_index { + pending_query_resets.use_query_set(tw.query_set, query_set, index); + } + + Some(hal::RenderPassTimestampWrites { + query_set: query_set.raw.as_ref().unwrap(), + beginning_of_pass_write_index: tw.beginning_of_pass_write_index, + end_of_pass_write_index: tw.end_of_pass_write_index, + }) + } else { + None + }; + + let occlusion_query_set = if let Some(occlusion_query_set) = occlusion_query_set { + let query_set = trackers + .query_sets + .add_single(query_set_guard, occlusion_query_set) + .ok_or(RenderPassErrorInner::InvalidQuerySet(occlusion_query_set))?; + + Some(query_set.raw.as_ref().unwrap()) + } else { + None + }; + + let hal_desc = hal::RenderPassDescriptor { + label: hal_label(label, device.instance_flags), + extent, + sample_count, + color_attachments: &colors, + depth_stencil_attachment: depth_stencil, + multiview, + timestamp_writes, + occlusion_query_set, + }; + unsafe { + encoder.raw.begin_render_pass(&hal_desc); + }; + + Ok(Self { + context, + usage_scope: UsageScope::new(buffer_guard, texture_guard), + render_attachments, + is_depth_read_only, + is_stencil_read_only, + extent, + _phantom: PhantomData, + pending_discard_init_fixups, + divergent_discarded_depth_stencil_aspect, + multiview, + }) + } + + fn finish( + mut self, + raw: &mut A::CommandEncoder, + snatch_guard: &SnatchGuard, + ) -> Result<(UsageScope<A>, SurfacesInDiscardState<A>), RenderPassErrorInner> { + profiling::scope!("RenderPassInfo::finish"); + unsafe { + raw.end_render_pass(); + } + + for ra in self.render_attachments { + let texture = &ra.texture; + check_texture_usage(texture.desc.usage, TextureUsages::RENDER_ATTACHMENT)?; + + // the tracker set of the pass is always in "extend" mode + unsafe { + self.usage_scope + .textures + .merge_single(texture, Some(ra.selector.clone()), ra.usage) + .map_err(UsageConflict::from)? + }; + } + + // If either only stencil or depth was discarded, we put in a special + // clear pass to keep the init status of the aspects in sync. We do this + // so we don't need to track init state for depth/stencil aspects + // individually. + // + // Note that we don't go the usual route of "brute force" initializing + // the texture when need arises here, since this path is actually + // something a user may genuinely want (where as the other cases are + // more seen along the lines as gracefully handling a user error). + if let Some((aspect, view)) = self.divergent_discarded_depth_stencil_aspect { + let (depth_ops, stencil_ops) = if aspect == wgt::TextureAspect::DepthOnly { + ( + hal::AttachmentOps::STORE, // clear depth + hal::AttachmentOps::LOAD | hal::AttachmentOps::STORE, // unchanged stencil + ) + } else { + ( + hal::AttachmentOps::LOAD | hal::AttachmentOps::STORE, // unchanged stencil + hal::AttachmentOps::STORE, // clear depth + ) + }; + let desc = hal::RenderPassDescriptor { + label: Some("(wgpu internal) Zero init discarded depth/stencil aspect"), + extent: view.render_extent.unwrap(), + sample_count: view.samples, + color_attachments: &[], + depth_stencil_attachment: Some(hal::DepthStencilAttachment { + target: hal::Attachment { + view: view.raw(snatch_guard).ok_or_else(|| { + RenderPassErrorInner::InvalidAttachment(view.info.id()) + })?, + usage: hal::TextureUses::DEPTH_STENCIL_WRITE, + }, + depth_ops, + stencil_ops, + clear_value: (0.0, 0), + }), + multiview: self.multiview, + timestamp_writes: None, + occlusion_query_set: None, + }; + unsafe { + raw.begin_render_pass(&desc); + raw.end_render_pass(); + } + } + + Ok((self.usage_scope, self.pending_discard_init_fixups)) + } +} + +// Common routines between render/compute + +impl Global { + pub fn command_encoder_run_render_pass<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + pass: &RenderPass, + ) -> Result<(), RenderPassError> { + self.command_encoder_run_render_pass_impl::<A>( + encoder_id, + pass.base.as_ref(), + &pass.color_targets, + pass.depth_stencil_target.as_ref(), + pass.timestamp_writes.as_ref(), + pass.occlusion_query_set_id, + ) + } + + #[doc(hidden)] + pub fn command_encoder_run_render_pass_impl<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + base: BasePassRef<RenderCommand>, + color_attachments: &[Option<RenderPassColorAttachment>], + depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>, + timestamp_writes: Option<&RenderPassTimestampWrites>, + occlusion_query_set_id: Option<id::QuerySetId>, + ) -> Result<(), RenderPassError> { + profiling::scope!( + "CommandEncoder::run_render_pass {}", + base.label.unwrap_or("") + ); + + let discard_hal_labels = self + .instance + .flags + .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS); + let label = hal_label(base.label, self.instance.flags); + + let pass_scope = PassErrorScope::Pass(encoder_id); + + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, encoder_id).map_pass_err(pass_scope)?; + let device = &cmd_buf.device; + let snatch_guard = device.snatchable_lock.read(); + + let (scope, pending_discard_init_fixups) = { + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(crate::device::trace::Command::RunRenderPass { + base: BasePass::from_ref(base), + target_colors: color_attachments.to_vec(), + target_depth_stencil: depth_stencil_attachment.cloned(), + timestamp_writes: timestamp_writes.cloned(), + occlusion_query_set_id, + }); + } + + if !device.is_valid() { + return Err(DeviceError::Lost).map_pass_err(pass_scope); + } + + let encoder = &mut cmd_buf_data.encoder; + let status = &mut cmd_buf_data.status; + let tracker = &mut cmd_buf_data.trackers; + let buffer_memory_init_actions = &mut cmd_buf_data.buffer_memory_init_actions; + let texture_memory_actions = &mut cmd_buf_data.texture_memory_actions; + let pending_query_resets = &mut cmd_buf_data.pending_query_resets; + + // We automatically keep extending command buffers over time, and because + // we want to insert a command buffer _before_ what we're about to record, + // we need to make sure to close the previous one. + encoder.close().map_pass_err(pass_scope)?; + // We will reset this to `Recording` if we succeed, acts as a fail-safe. + *status = CommandEncoderStatus::Error; + encoder.open_pass(label).map_pass_err(pass_scope)?; + + let bundle_guard = hub.render_bundles.read(); + let bind_group_guard = hub.bind_groups.read(); + let render_pipeline_guard = hub.render_pipelines.read(); + let query_set_guard = hub.query_sets.read(); + let buffer_guard = hub.buffers.read(); + let texture_guard = hub.textures.read(); + let view_guard = hub.texture_views.read(); + + log::trace!( + "Encoding render pass begin in command buffer {:?}", + encoder_id + ); + + let mut info = RenderPassInfo::start( + device, + label, + color_attachments, + depth_stencil_attachment, + timestamp_writes, + occlusion_query_set_id, + encoder, + tracker, + texture_memory_actions, + pending_query_resets, + &*view_guard, + &*buffer_guard, + &*texture_guard, + &*query_set_guard, + &snatch_guard, + ) + .map_pass_err(pass_scope)?; + + tracker.set_size( + Some(&*buffer_guard), + Some(&*texture_guard), + Some(&*view_guard), + None, + Some(&*bind_group_guard), + None, + Some(&*render_pipeline_guard), + Some(&*bundle_guard), + Some(&*query_set_guard), + ); + + let raw = &mut encoder.raw; + + let mut state = State { + pipeline_flags: PipelineFlags::empty(), + binder: Binder::new(), + blend_constant: OptionalState::Unused, + stencil_reference: 0, + pipeline: None, + index: IndexState::default(), + vertex: VertexState::default(), + debug_scope_depth: 0, + }; + let mut temp_offsets = Vec::new(); + let mut dynamic_offset_count = 0; + let mut string_offset = 0; + let mut active_query = None; + + for command in base.commands { + match *command { + RenderCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => { + api_log!("RenderPass::set_bind_group {index} {bind_group_id:?}"); + + let scope = PassErrorScope::SetBindGroup(bind_group_id); + let max_bind_groups = device.limits.max_bind_groups; + if index >= max_bind_groups { + return Err(RenderCommandError::BindGroupIndexOutOfRange { + index, + max: max_bind_groups, + }) + .map_pass_err(scope); + } + + temp_offsets.clear(); + temp_offsets.extend_from_slice( + &base.dynamic_offsets + [dynamic_offset_count..dynamic_offset_count + num_dynamic_offsets], + ); + dynamic_offset_count += num_dynamic_offsets; + + let bind_group = tracker + .bind_groups + .add_single(&*bind_group_guard, bind_group_id) + .ok_or(RenderCommandError::InvalidBindGroup(bind_group_id)) + .map_pass_err(scope)?; + + if bind_group.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice).map_pass_err(scope); + } + + bind_group + .validate_dynamic_bindings(index, &temp_offsets, &cmd_buf.limits) + .map_pass_err(scope)?; + + // merge the resource tracker in + unsafe { + info.usage_scope + .merge_bind_group(&bind_group.used) + .map_pass_err(scope)?; + } + //Note: stateless trackers are not merged: the lifetime reference + // is held to the bind group itself. + + buffer_memory_init_actions.extend( + bind_group.used_buffer_ranges.iter().filter_map(|action| { + action + .buffer + .initialization_status + .read() + .check_action(action) + }), + ); + for action in bind_group.used_texture_ranges.iter() { + info.pending_discard_init_fixups + .extend(texture_memory_actions.register_init_action(action)); + } + + let pipeline_layout = state.binder.pipeline_layout.clone(); + let entries = + state + .binder + .assign_group(index as usize, bind_group, &temp_offsets); + if !entries.is_empty() && pipeline_layout.is_some() { + let pipeline_layout = pipeline_layout.as_ref().unwrap().raw(); + for (i, e) in entries.iter().enumerate() { + if let Some(group) = e.group.as_ref() { + let raw_bg = group + .raw(&snatch_guard) + .ok_or(RenderPassErrorInner::InvalidBindGroup(i)) + .map_pass_err(scope)?; + unsafe { + raw.set_bind_group( + pipeline_layout, + index + i as u32, + raw_bg, + &e.dynamic_offsets, + ); + } + } + } + } + } + RenderCommand::SetPipeline(pipeline_id) => { + api_log!("RenderPass::set_pipeline {pipeline_id:?}"); + + let scope = PassErrorScope::SetPipelineRender(pipeline_id); + state.pipeline = Some(pipeline_id); + + let pipeline: &pipeline::RenderPipeline<A> = tracker + .render_pipelines + .add_single(&*render_pipeline_guard, pipeline_id) + .ok_or(RenderCommandError::InvalidPipeline(pipeline_id)) + .map_pass_err(scope)?; + + if pipeline.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice).map_pass_err(scope); + } + + info.context + .check_compatible( + &pipeline.pass_context, + RenderPassCompatibilityCheckType::RenderPipeline, + ) + .map_err(RenderCommandError::IncompatiblePipelineTargets) + .map_pass_err(scope)?; + + state.pipeline_flags = pipeline.flags; + + if (pipeline.flags.contains(PipelineFlags::WRITES_DEPTH) + && info.is_depth_read_only) + || (pipeline.flags.contains(PipelineFlags::WRITES_STENCIL) + && info.is_stencil_read_only) + { + return Err(RenderCommandError::IncompatiblePipelineRods) + .map_pass_err(scope); + } + + state + .blend_constant + .require(pipeline.flags.contains(PipelineFlags::BLEND_CONSTANT)); + + unsafe { + raw.set_render_pipeline(pipeline.raw()); + } + + if pipeline.flags.contains(PipelineFlags::STENCIL_REFERENCE) { + unsafe { + raw.set_stencil_reference(state.stencil_reference); + } + } + + // Rebind resource + if state.binder.pipeline_layout.is_none() + || !state + .binder + .pipeline_layout + .as_ref() + .unwrap() + .is_equal(&pipeline.layout) + { + let (start_index, entries) = state.binder.change_pipeline_layout( + &pipeline.layout, + &pipeline.late_sized_buffer_groups, + ); + if !entries.is_empty() { + for (i, e) in entries.iter().enumerate() { + if let Some(group) = e.group.as_ref() { + let raw_bg = group + .raw(&snatch_guard) + .ok_or(RenderPassErrorInner::InvalidBindGroup(i)) + .map_pass_err(scope)?; + unsafe { + raw.set_bind_group( + pipeline.layout.raw(), + start_index as u32 + i as u32, + raw_bg, + &e.dynamic_offsets, + ); + } + } + } + } + + // Clear push constant ranges + let non_overlapping = super::bind::compute_nonoverlapping_ranges( + &pipeline.layout.push_constant_ranges, + ); + for range in non_overlapping { + let offset = range.range.start; + let size_bytes = range.range.end - offset; + super::push_constant_clear( + offset, + size_bytes, + |clear_offset, clear_data| unsafe { + raw.set_push_constants( + pipeline.layout.raw(), + range.stages, + clear_offset, + clear_data, + ); + }, + ); + } + } + + state.index.pipeline_format = pipeline.strip_index_format; + + let vertex_steps_len = pipeline.vertex_steps.len(); + state.vertex.buffers_required = vertex_steps_len as u32; + + // Initialize each `vertex.inputs[i].step` from + // `pipeline.vertex_steps[i]`. Enlarge `vertex.inputs` + // as necessary to accommodate all slots in the + // pipeline. If `vertex.inputs` is longer, fill the + // extra entries with default `VertexStep`s. + while state.vertex.inputs.len() < vertex_steps_len { + state.vertex.inputs.push(VertexBufferState::EMPTY); + } + + // This is worse as a `zip`, but it's close. + let mut steps = pipeline.vertex_steps.iter(); + for input in state.vertex.inputs.iter_mut() { + input.step = steps.next().cloned().unwrap_or_default(); + } + + // Update vertex buffer limits. + state.vertex.update_limits(); + } + RenderCommand::SetIndexBuffer { + buffer_id, + index_format, + offset, + size, + } => { + api_log!("RenderPass::set_index_buffer {buffer_id:?}"); + + let scope = PassErrorScope::SetIndexBuffer(buffer_id); + let buffer = info + .usage_scope + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDEX) + .map_pass_err(scope)?; + + if buffer.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice).map_pass_err(scope); + } + + check_buffer_usage(buffer.usage, BufferUsages::INDEX) + .map_pass_err(scope)?; + let buf_raw = buffer + .raw + .get(&snatch_guard) + .ok_or(RenderCommandError::DestroyedBuffer(buffer_id)) + .map_pass_err(scope)?; + + let end = match size { + Some(s) => offset + s.get(), + None => buffer.size, + }; + state.index.bound_buffer_view = Some((buffer_id, offset..end)); + + state.index.format = Some(index_format); + state.index.update_limit(); + + buffer_memory_init_actions.extend( + buffer.initialization_status.read().create_action( + buffer, + offset..end, + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + let bb = hal::BufferBinding { + buffer: buf_raw, + offset, + size, + }; + unsafe { + raw.set_index_buffer(bb, index_format); + } + } + RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + } => { + api_log!("RenderPass::set_vertex_buffer {slot} {buffer_id:?}"); + + let scope = PassErrorScope::SetVertexBuffer(buffer_id); + let buffer = info + .usage_scope + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::VERTEX) + .map_pass_err(scope)?; + + if buffer.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice).map_pass_err(scope); + } + + let max_vertex_buffers = device.limits.max_vertex_buffers; + if slot >= max_vertex_buffers { + return Err(RenderCommandError::VertexBufferIndexOutOfRange { + index: slot, + max: max_vertex_buffers, + }) + .map_pass_err(scope); + } + + check_buffer_usage(buffer.usage, BufferUsages::VERTEX) + .map_pass_err(scope)?; + let buf_raw = buffer + .raw + .get(&snatch_guard) + .ok_or(RenderCommandError::DestroyedBuffer(buffer_id)) + .map_pass_err(scope)?; + + let empty_slots = + (1 + slot as usize).saturating_sub(state.vertex.inputs.len()); + state + .vertex + .inputs + .extend(iter::repeat(VertexBufferState::EMPTY).take(empty_slots)); + let vertex_state = &mut state.vertex.inputs[slot as usize]; + //TODO: where are we checking that the offset is in bound? + vertex_state.total_size = match size { + Some(s) => s.get(), + None => buffer.size - offset, + }; + vertex_state.bound = true; + + buffer_memory_init_actions.extend( + buffer.initialization_status.read().create_action( + buffer, + offset..(offset + vertex_state.total_size), + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + let bb = hal::BufferBinding { + buffer: buf_raw, + offset, + size, + }; + unsafe { + raw.set_vertex_buffer(slot, bb); + } + state.vertex.update_limits(); + } + RenderCommand::SetBlendConstant(ref color) => { + api_log!("RenderPass::set_blend_constant"); + + state.blend_constant = OptionalState::Set; + let array = [ + color.r as f32, + color.g as f32, + color.b as f32, + color.a as f32, + ]; + unsafe { + raw.set_blend_constants(&array); + } + } + RenderCommand::SetStencilReference(value) => { + api_log!("RenderPass::set_stencil_reference {value}"); + + state.stencil_reference = value; + if state + .pipeline_flags + .contains(PipelineFlags::STENCIL_REFERENCE) + { + unsafe { + raw.set_stencil_reference(value); + } + } + } + RenderCommand::SetViewport { + ref rect, + depth_min, + depth_max, + } => { + api_log!("RenderPass::set_viewport {rect:?}"); + + let scope = PassErrorScope::SetViewport; + if rect.x < 0.0 + || rect.y < 0.0 + || rect.w <= 0.0 + || rect.h <= 0.0 + || rect.x + rect.w > info.extent.width as f32 + || rect.y + rect.h > info.extent.height as f32 + { + return Err(RenderCommandError::InvalidViewportRect( + *rect, + info.extent, + )) + .map_pass_err(scope); + } + if !(0.0..=1.0).contains(&depth_min) || !(0.0..=1.0).contains(&depth_max) { + return Err(RenderCommandError::InvalidViewportDepth( + depth_min, depth_max, + )) + .map_pass_err(scope); + } + let r = hal::Rect { + x: rect.x, + y: rect.y, + w: rect.w, + h: rect.h, + }; + unsafe { + raw.set_viewport(&r, depth_min..depth_max); + } + } + RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset, + } => { + api_log!("RenderPass::set_push_constants"); + + let scope = PassErrorScope::SetPushConstant; + let values_offset = values_offset + .ok_or(RenderPassErrorInner::InvalidValuesOffset) + .map_pass_err(scope)?; + + let end_offset_bytes = offset + size_bytes; + let values_end_offset = + (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize; + let data_slice = + &base.push_constant_data[(values_offset as usize)..values_end_offset]; + + let pipeline_layout = state + .binder + .pipeline_layout + .as_ref() + .ok_or(DrawError::MissingPipeline) + .map_pass_err(scope)?; + + pipeline_layout + .validate_push_constant_ranges(stages, offset, end_offset_bytes) + .map_err(RenderCommandError::from) + .map_pass_err(scope)?; + + unsafe { + raw.set_push_constants( + pipeline_layout.raw(), + stages, + offset, + data_slice, + ) + } + } + RenderCommand::SetScissor(ref rect) => { + api_log!("RenderPass::set_scissor_rect {rect:?}"); + + let scope = PassErrorScope::SetScissorRect; + if rect.x + rect.w > info.extent.width + || rect.y + rect.h > info.extent.height + { + return Err(RenderCommandError::InvalidScissorRect(*rect, info.extent)) + .map_pass_err(scope); + } + let r = hal::Rect { + x: rect.x, + y: rect.y, + w: rect.w, + h: rect.h, + }; + unsafe { + raw.set_scissor_rect(&r); + } + } + RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + } => { + api_log!( + "RenderPass::draw {vertex_count} {instance_count} {first_vertex} {first_instance}" + ); + + let indexed = false; + let scope = PassErrorScope::Draw { + indexed, + indirect: false, + pipeline: state.pipeline, + }; + state.is_ready(indexed).map_pass_err(scope)?; + + let last_vertex = first_vertex as u64 + vertex_count as u64; + let vertex_limit = state.vertex.vertex_limit; + if last_vertex > vertex_limit { + return Err(DrawError::VertexBeyondLimit { + last_vertex, + vertex_limit, + slot: state.vertex.vertex_limit_slot, + }) + .map_pass_err(scope); + } + let last_instance = first_instance as u64 + instance_count as u64; + let instance_limit = state.vertex.instance_limit; + if last_instance > instance_limit { + return Err(DrawError::InstanceBeyondLimit { + last_instance, + instance_limit, + slot: state.vertex.instance_limit_slot, + }) + .map_pass_err(scope); + } + + unsafe { + if instance_count > 0 && vertex_count > 0 { + raw.draw( + first_vertex, + vertex_count, + first_instance, + instance_count, + ); + } + } + } + RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + } => { + api_log!("RenderPass::draw_indexed {index_count} {instance_count} {first_index} {base_vertex} {first_instance}"); + + let indexed = true; + let scope = PassErrorScope::Draw { + indexed, + indirect: false, + pipeline: state.pipeline, + }; + state.is_ready(indexed).map_pass_err(scope)?; + + let last_index = first_index as u64 + index_count as u64; + let index_limit = state.index.limit; + if last_index > index_limit { + return Err(DrawError::IndexBeyondLimit { + last_index, + index_limit, + }) + .map_pass_err(scope); + } + let last_instance = first_instance as u64 + instance_count as u64; + let instance_limit = state.vertex.instance_limit; + if last_instance > instance_limit { + return Err(DrawError::InstanceBeyondLimit { + last_instance, + instance_limit, + slot: state.vertex.instance_limit_slot, + }) + .map_pass_err(scope); + } + + unsafe { + if instance_count > 0 && index_count > 0 { + raw.draw_indexed( + first_index, + index_count, + base_vertex, + first_instance, + instance_count, + ); + } + } + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count, + indexed, + } => { + api_log!("RenderPass::draw_indirect (indexed:{indexed}) {buffer_id:?} {offset} {count:?}"); + + let scope = PassErrorScope::Draw { + indexed, + indirect: true, + pipeline: state.pipeline, + }; + state.is_ready(indexed).map_pass_err(scope)?; + + let stride = match indexed { + false => mem::size_of::<wgt::DrawIndirectArgs>(), + true => mem::size_of::<wgt::DrawIndexedIndirectArgs>(), + }; + + if count.is_some() { + device + .require_features(wgt::Features::MULTI_DRAW_INDIRECT) + .map_pass_err(scope)?; + } + device + .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) + .map_pass_err(scope)?; + + let indirect_buffer = info + .usage_scope + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDIRECT) + .map_pass_err(scope)?; + check_buffer_usage(indirect_buffer.usage, BufferUsages::INDIRECT) + .map_pass_err(scope)?; + let indirect_raw = indirect_buffer + .raw + .get(&snatch_guard) + .ok_or(RenderCommandError::DestroyedBuffer(buffer_id)) + .map_pass_err(scope)?; + + let actual_count = count.map_or(1, |c| c.get()); + + let end_offset = offset + stride as u64 * actual_count as u64; + if end_offset > indirect_buffer.size { + return Err(RenderPassErrorInner::IndirectBufferOverrun { + count, + offset, + end_offset, + buffer_size: indirect_buffer.size, + }) + .map_pass_err(scope); + } + + buffer_memory_init_actions.extend( + indirect_buffer.initialization_status.read().create_action( + indirect_buffer, + offset..end_offset, + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + match indexed { + false => unsafe { + raw.draw_indirect(indirect_raw, offset, actual_count); + }, + true => unsafe { + raw.draw_indexed_indirect(indirect_raw, offset, actual_count); + }, + } + } + RenderCommand::MultiDrawIndirectCount { + buffer_id, + offset, + count_buffer_id, + count_buffer_offset, + max_count, + indexed, + } => { + api_log!("RenderPass::multi_draw_indirect_count (indexed:{indexed}) {buffer_id:?} {offset} {count_buffer_id:?} {count_buffer_offset:?} {max_count:?}"); + + let scope = PassErrorScope::Draw { + indexed, + indirect: true, + pipeline: state.pipeline, + }; + state.is_ready(indexed).map_pass_err(scope)?; + + let stride = match indexed { + false => mem::size_of::<wgt::DrawIndirectArgs>(), + true => mem::size_of::<wgt::DrawIndexedIndirectArgs>(), + } as u64; + + device + .require_features(wgt::Features::MULTI_DRAW_INDIRECT_COUNT) + .map_pass_err(scope)?; + device + .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) + .map_pass_err(scope)?; + + let indirect_buffer = info + .usage_scope + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDIRECT) + .map_pass_err(scope)?; + check_buffer_usage(indirect_buffer.usage, BufferUsages::INDIRECT) + .map_pass_err(scope)?; + let indirect_raw = indirect_buffer + .raw + .get(&snatch_guard) + .ok_or(RenderCommandError::DestroyedBuffer(buffer_id)) + .map_pass_err(scope)?; + + let count_buffer = info + .usage_scope + .buffers + .merge_single( + &*buffer_guard, + count_buffer_id, + hal::BufferUses::INDIRECT, + ) + .map_pass_err(scope)?; + check_buffer_usage(count_buffer.usage, BufferUsages::INDIRECT) + .map_pass_err(scope)?; + let count_raw = count_buffer + .raw + .get(&snatch_guard) + .ok_or(RenderCommandError::DestroyedBuffer(count_buffer_id)) + .map_pass_err(scope)?; + + let end_offset = offset + stride * max_count as u64; + if end_offset > indirect_buffer.size { + return Err(RenderPassErrorInner::IndirectBufferOverrun { + count: None, + offset, + end_offset, + buffer_size: indirect_buffer.size, + }) + .map_pass_err(scope); + } + buffer_memory_init_actions.extend( + indirect_buffer.initialization_status.read().create_action( + indirect_buffer, + offset..end_offset, + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + let begin_count_offset = count_buffer_offset; + let end_count_offset = count_buffer_offset + 4; + if end_count_offset > count_buffer.size { + return Err(RenderPassErrorInner::IndirectCountBufferOverrun { + begin_count_offset, + end_count_offset, + count_buffer_size: count_buffer.size, + }) + .map_pass_err(scope); + } + buffer_memory_init_actions.extend( + count_buffer.initialization_status.read().create_action( + count_buffer, + count_buffer_offset..end_count_offset, + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + match indexed { + false => unsafe { + raw.draw_indirect_count( + indirect_raw, + offset, + count_raw, + count_buffer_offset, + max_count, + ); + }, + true => unsafe { + raw.draw_indexed_indirect_count( + indirect_raw, + offset, + count_raw, + count_buffer_offset, + max_count, + ); + }, + } + } + RenderCommand::PushDebugGroup { color: _, len } => { + state.debug_scope_depth += 1; + if !discard_hal_labels { + let label = str::from_utf8( + &base.string_data[string_offset..string_offset + len], + ) + .unwrap(); + + api_log!("RenderPass::push_debug_group {label:?}"); + unsafe { + raw.begin_debug_marker(label); + } + } + string_offset += len; + } + RenderCommand::PopDebugGroup => { + api_log!("RenderPass::pop_debug_group"); + + let scope = PassErrorScope::PopDebugGroup; + if state.debug_scope_depth == 0 { + return Err(RenderPassErrorInner::InvalidPopDebugGroup) + .map_pass_err(scope); + } + state.debug_scope_depth -= 1; + if !discard_hal_labels { + unsafe { + raw.end_debug_marker(); + } + } + } + RenderCommand::InsertDebugMarker { color: _, len } => { + if !discard_hal_labels { + let label = str::from_utf8( + &base.string_data[string_offset..string_offset + len], + ) + .unwrap(); + api_log!("RenderPass::insert_debug_marker {label:?}"); + unsafe { + raw.insert_debug_marker(label); + } + } + string_offset += len; + } + RenderCommand::WriteTimestamp { + query_set_id, + query_index, + } => { + api_log!("RenderPass::write_timestamps {query_set_id:?} {query_index}"); + let scope = PassErrorScope::WriteTimestamp; + + device + .require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES) + .map_pass_err(scope)?; + + let query_set = tracker + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(RenderCommandError::InvalidQuerySet(query_set_id)) + .map_pass_err(scope)?; + + query_set + .validate_and_write_timestamp( + raw, + query_set_id, + query_index, + Some(&mut cmd_buf_data.pending_query_resets), + ) + .map_pass_err(scope)?; + } + RenderCommand::BeginOcclusionQuery { query_index } => { + api_log!("RenderPass::begin_occlusion_query {query_index}"); + let scope = PassErrorScope::BeginOcclusionQuery; + + let query_set_id = occlusion_query_set_id + .ok_or(RenderPassErrorInner::MissingOcclusionQuerySet) + .map_pass_err(scope)?; + + let query_set = tracker + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(RenderCommandError::InvalidQuerySet(query_set_id)) + .map_pass_err(scope)?; + + query_set + .validate_and_begin_occlusion_query( + raw, + query_set_id, + query_index, + Some(&mut cmd_buf_data.pending_query_resets), + &mut active_query, + ) + .map_pass_err(scope)?; + } + RenderCommand::EndOcclusionQuery => { + api_log!("RenderPass::end_occlusion_query"); + let scope = PassErrorScope::EndOcclusionQuery; + + end_occlusion_query(raw, &*query_set_guard, &mut active_query) + .map_pass_err(scope)?; + } + RenderCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + } => { + api_log!("RenderPass::begin_pipeline_statistics_query {query_set_id:?} {query_index}"); + let scope = PassErrorScope::BeginPipelineStatisticsQuery; + + let query_set = tracker + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(RenderCommandError::InvalidQuerySet(query_set_id)) + .map_pass_err(scope)?; + + query_set + .validate_and_begin_pipeline_statistics_query( + raw, + query_set_id, + query_index, + Some(&mut cmd_buf_data.pending_query_resets), + &mut active_query, + ) + .map_pass_err(scope)?; + } + RenderCommand::EndPipelineStatisticsQuery => { + api_log!("RenderPass::end_pipeline_statistics_query"); + let scope = PassErrorScope::EndPipelineStatisticsQuery; + + end_pipeline_statistics_query(raw, &*query_set_guard, &mut active_query) + .map_pass_err(scope)?; + } + RenderCommand::ExecuteBundle(bundle_id) => { + api_log!("RenderPass::execute_bundle {bundle_id:?}"); + let scope = PassErrorScope::ExecuteBundle; + let bundle: &command::RenderBundle<A> = tracker + .bundles + .add_single(&*bundle_guard, bundle_id) + .ok_or(RenderCommandError::InvalidRenderBundle(bundle_id)) + .map_pass_err(scope)?; + + if bundle.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice).map_pass_err(scope); + } + + info.context + .check_compatible( + &bundle.context, + RenderPassCompatibilityCheckType::RenderBundle, + ) + .map_err(RenderPassErrorInner::IncompatibleBundleTargets) + .map_pass_err(scope)?; + + if (info.is_depth_read_only && !bundle.is_depth_read_only) + || (info.is_stencil_read_only && !bundle.is_stencil_read_only) + { + return Err( + RenderPassErrorInner::IncompatibleBundleReadOnlyDepthStencil { + pass_depth: info.is_depth_read_only, + pass_stencil: info.is_stencil_read_only, + bundle_depth: bundle.is_depth_read_only, + bundle_stencil: bundle.is_stencil_read_only, + }, + ) + .map_pass_err(scope); + } + + buffer_memory_init_actions.extend( + bundle + .buffer_memory_init_actions + .iter() + .filter_map(|action| { + action + .buffer + .initialization_status + .read() + .check_action(action) + }), + ); + for action in bundle.texture_memory_init_actions.iter() { + info.pending_discard_init_fixups + .extend(texture_memory_actions.register_init_action(action)); + } + + unsafe { bundle.execute(raw) } + .map_err(|e| match e { + ExecutionError::DestroyedBuffer(id) => { + RenderCommandError::DestroyedBuffer(id) + } + ExecutionError::InvalidBindGroup(id) => { + RenderCommandError::InvalidBindGroup(id) + } + ExecutionError::Unimplemented(what) => { + RenderCommandError::Unimplemented(what) + } + }) + .map_pass_err(scope)?; + + unsafe { + info.usage_scope + .merge_render_bundle(&bundle.used) + .map_pass_err(scope)?; + tracker + .add_from_render_bundle(&bundle.used) + .map_pass_err(scope)?; + }; + state.reset_bundle(); + } + } + } + + log::trace!("Merging renderpass into cmd_buf {:?}", encoder_id); + let (trackers, pending_discard_init_fixups) = + info.finish(raw, &snatch_guard).map_pass_err(pass_scope)?; + + encoder.close().map_pass_err(pass_scope)?; + (trackers, pending_discard_init_fixups) + }; + + let cmd_buf = hub.command_buffers.get(encoder_id.transmute()).unwrap(); + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + let query_set_guard = hub.query_sets.read(); + + let encoder = &mut cmd_buf_data.encoder; + let status = &mut cmd_buf_data.status; + let tracker = &mut cmd_buf_data.trackers; + + { + let transit = encoder.open().map_pass_err(pass_scope)?; + + fixup_discarded_surfaces( + pending_discard_init_fixups.into_iter(), + transit, + &mut tracker.textures, + &cmd_buf.device, + ); + + cmd_buf_data + .pending_query_resets + .reset_queries( + transit, + &query_set_guard, + cmd_buf.device.info.id().backend(), + ) + .map_err(RenderCommandError::InvalidQuerySet) + .map_pass_err(PassErrorScope::QueryReset)?; + + super::CommandBuffer::insert_barriers_from_scope( + transit, + tracker, + &scope, + &snatch_guard, + ); + } + + *status = CommandEncoderStatus::Recording; + encoder.close_and_swap().map_pass_err(pass_scope)?; + + Ok(()) + } +} + +pub mod render_ffi { + use super::{ + super::{Rect, RenderCommand}, + RenderPass, + }; + use crate::{id, RawString}; + use std::{convert::TryInto, ffi, num::NonZeroU32, slice}; + use wgt::{BufferAddress, BufferSize, Color, DynamicOffset, IndexFormat}; + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `offset_length` elements. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_set_bind_group( + pass: &mut RenderPass, + index: u32, + bind_group_id: id::BindGroupId, + offsets: *const DynamicOffset, + offset_length: usize, + ) { + let redundant = unsafe { + pass.current_bind_groups.set_and_check_redundant( + bind_group_id, + index, + &mut pass.base.dynamic_offsets, + offsets, + offset_length, + ) + }; + + if redundant { + return; + } + + pass.base.commands.push(RenderCommand::SetBindGroup { + index, + num_dynamic_offsets: offset_length, + bind_group_id, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_pipeline( + pass: &mut RenderPass, + pipeline_id: id::RenderPipelineId, + ) { + if pass.current_pipeline.set_and_check_redundant(pipeline_id) { + return; + } + + pass.base + .commands + .push(RenderCommand::SetPipeline(pipeline_id)); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_vertex_buffer( + pass: &mut RenderPass, + slot: u32, + buffer_id: id::BufferId, + offset: BufferAddress, + size: Option<BufferSize>, + ) { + pass.base.commands.push(RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_index_buffer( + pass: &mut RenderPass, + buffer: id::BufferId, + index_format: IndexFormat, + offset: BufferAddress, + size: Option<BufferSize>, + ) { + pass.set_index_buffer(buffer, index_format, offset, size); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_blend_constant(pass: &mut RenderPass, color: &Color) { + pass.base + .commands + .push(RenderCommand::SetBlendConstant(*color)); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_stencil_reference(pass: &mut RenderPass, value: u32) { + pass.base + .commands + .push(RenderCommand::SetStencilReference(value)); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_viewport( + pass: &mut RenderPass, + x: f32, + y: f32, + w: f32, + h: f32, + depth_min: f32, + depth_max: f32, + ) { + pass.base.commands.push(RenderCommand::SetViewport { + rect: Rect { x, y, w, h }, + depth_min, + depth_max, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_scissor_rect( + pass: &mut RenderPass, + x: u32, + y: u32, + w: u32, + h: u32, + ) { + pass.base + .commands + .push(RenderCommand::SetScissor(Rect { x, y, w, h })); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `size_bytes` bytes. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_set_push_constants( + pass: &mut RenderPass, + stages: wgt::ShaderStages, + offset: u32, + size_bytes: u32, + data: *const u8, + ) { + assert_eq!( + offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant offset must be aligned to 4 bytes." + ); + assert_eq!( + size_bytes & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant size must be aligned to 4 bytes." + ); + let data_slice = unsafe { slice::from_raw_parts(data, size_bytes as usize) }; + let value_offset = pass.base.push_constant_data.len().try_into().expect( + "Ran out of push constant space. Don't set 4gb of push constants per RenderPass.", + ); + + pass.base.push_constant_data.extend( + data_slice + .chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) + .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])), + ); + + pass.base.commands.push(RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset: Some(value_offset), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_draw( + pass: &mut RenderPass, + vertex_count: u32, + instance_count: u32, + first_vertex: u32, + first_instance: u32, + ) { + pass.base.commands.push(RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_draw_indexed( + pass: &mut RenderPass, + index_count: u32, + instance_count: u32, + first_index: u32, + base_vertex: i32, + first_instance: u32, + ) { + pass.base.commands.push(RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_draw_indirect( + pass: &mut RenderPass, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + pass.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: false, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_draw_indexed_indirect( + pass: &mut RenderPass, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + pass.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: true, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_multi_draw_indirect( + pass: &mut RenderPass, + buffer_id: id::BufferId, + offset: BufferAddress, + count: u32, + ) { + pass.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: NonZeroU32::new(count), + indexed: false, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_multi_draw_indexed_indirect( + pass: &mut RenderPass, + buffer_id: id::BufferId, + offset: BufferAddress, + count: u32, + ) { + pass.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: NonZeroU32::new(count), + indexed: true, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_multi_draw_indirect_count( + pass: &mut RenderPass, + buffer_id: id::BufferId, + offset: BufferAddress, + count_buffer_id: id::BufferId, + count_buffer_offset: BufferAddress, + max_count: u32, + ) { + pass.base + .commands + .push(RenderCommand::MultiDrawIndirectCount { + buffer_id, + offset, + count_buffer_id, + count_buffer_offset, + max_count, + indexed: false, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_multi_draw_indexed_indirect_count( + pass: &mut RenderPass, + buffer_id: id::BufferId, + offset: BufferAddress, + count_buffer_id: id::BufferId, + count_buffer_offset: BufferAddress, + max_count: u32, + ) { + pass.base + .commands + .push(RenderCommand::MultiDrawIndirectCount { + buffer_id, + offset, + count_buffer_id, + count_buffer_offset, + max_count, + indexed: true, + }); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given `label` + /// is a valid null-terminated string. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_push_debug_group( + pass: &mut RenderPass, + label: RawString, + color: u32, + ) { + let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pass.base.string_data.extend_from_slice(bytes); + + pass.base.commands.push(RenderCommand::PushDebugGroup { + color, + len: bytes.len(), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_pop_debug_group(pass: &mut RenderPass) { + pass.base.commands.push(RenderCommand::PopDebugGroup); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given `label` + /// is a valid null-terminated string. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_insert_debug_marker( + pass: &mut RenderPass, + label: RawString, + color: u32, + ) { + let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pass.base.string_data.extend_from_slice(bytes); + + pass.base.commands.push(RenderCommand::InsertDebugMarker { + color, + len: bytes.len(), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_write_timestamp( + pass: &mut RenderPass, + query_set_id: id::QuerySetId, + query_index: u32, + ) { + pass.base.commands.push(RenderCommand::WriteTimestamp { + query_set_id, + query_index, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_begin_occlusion_query( + pass: &mut RenderPass, + query_index: u32, + ) { + pass.base + .commands + .push(RenderCommand::BeginOcclusionQuery { query_index }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_end_occlusion_query(pass: &mut RenderPass) { + pass.base.commands.push(RenderCommand::EndOcclusionQuery); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_begin_pipeline_statistics_query( + pass: &mut RenderPass, + query_set_id: id::QuerySetId, + query_index: u32, + ) { + pass.base + .commands + .push(RenderCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_end_pipeline_statistics_query(pass: &mut RenderPass) { + pass.base + .commands + .push(RenderCommand::EndPipelineStatisticsQuery); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `render_bundle_ids_length` elements. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_execute_bundles( + pass: &mut RenderPass, + render_bundle_ids: *const id::RenderBundleId, + render_bundle_ids_length: usize, + ) { + for &bundle_id in + unsafe { slice::from_raw_parts(render_bundle_ids, render_bundle_ids_length) } + { + pass.base + .commands + .push(RenderCommand::ExecuteBundle(bundle_id)); + } + pass.current_pipeline.reset(); + pass.current_bind_groups.reset(); + } +} diff --git a/third_party/rust/wgpu-core/src/command/transfer.rs b/third_party/rust/wgpu-core/src/command/transfer.rs new file mode 100644 index 0000000000..0a952dfc84 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/transfer.rs @@ -0,0 +1,1229 @@ +#[cfg(feature = "trace")] +use crate::device::trace::Command as TraceCommand; +use crate::{ + api_log, + command::{clear_texture, CommandBuffer, CommandEncoderError}, + conv, + device::{Device, DeviceError, MissingDownlevelFlags}, + error::{ErrorFormatter, PrettyError}, + global::Global, + hal_api::HalApi, + id::{BufferId, CommandEncoderId, DeviceId, TextureId}, + init_tracker::{ + has_copy_partial_init_tracker_coverage, MemoryInitKind, TextureInitRange, + TextureInitTrackerAction, + }, + resource::{Resource, Texture, TextureErrorDimension}, + track::{TextureSelector, Tracker}, +}; + +use arrayvec::ArrayVec; +use hal::CommandEncoder as _; +use thiserror::Error; +use wgt::{BufferAddress, BufferUsages, Extent3d, TextureUsages}; + +use std::{iter, sync::Arc}; + +use super::{memory_init::CommandBufferTextureMemoryActions, ClearError, CommandEncoder}; + +pub type ImageCopyBuffer = wgt::ImageCopyBuffer<BufferId>; +pub type ImageCopyTexture = wgt::ImageCopyTexture<TextureId>; +pub type ImageCopyTextureTagged = wgt::ImageCopyTextureTagged<TextureId>; + +#[derive(Clone, Copy, Debug)] +pub enum CopySide { + Source, + Destination, +} + +/// Error encountered while attempting a data transfer. +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum TransferError { + #[error("Device {0:?} is invalid")] + InvalidDevice(DeviceId), + #[error("Buffer {0:?} is invalid or destroyed")] + InvalidBuffer(BufferId), + #[error("Texture {0:?} is invalid or destroyed")] + InvalidTexture(TextureId), + #[error("Source and destination cannot be the same buffer")] + SameSourceDestinationBuffer, + #[error("Source buffer/texture is missing the `COPY_SRC` usage flag")] + MissingCopySrcUsageFlag, + #[error("Destination buffer/texture is missing the `COPY_DST` usage flag")] + MissingCopyDstUsageFlag(Option<BufferId>, Option<TextureId>), + #[error("Destination texture is missing the `RENDER_ATTACHMENT` usage flag")] + MissingRenderAttachmentUsageFlag(TextureId), + #[error("Copy of {start_offset}..{end_offset} would end up overrunning the bounds of the {side:?} buffer of size {buffer_size}")] + BufferOverrun { + start_offset: BufferAddress, + end_offset: BufferAddress, + buffer_size: BufferAddress, + side: CopySide, + }, + #[error("Copy of {dimension:?} {start_offset}..{end_offset} would end up overrunning the bounds of the {side:?} texture of {dimension:?} size {texture_size}")] + TextureOverrun { + start_offset: u32, + end_offset: u32, + texture_size: u32, + dimension: TextureErrorDimension, + side: CopySide, + }, + #[error("Unable to select texture aspect {aspect:?} from format {format:?}")] + InvalidTextureAspect { + format: wgt::TextureFormat, + aspect: wgt::TextureAspect, + }, + #[error("Unable to select texture mip level {level} out of {total}")] + InvalidTextureMipLevel { level: u32, total: u32 }, + #[error("Texture dimension must be 2D when copying from an external texture")] + InvalidDimensionExternal(TextureId), + #[error("Buffer offset {0} is not aligned to block size or `COPY_BUFFER_ALIGNMENT`")] + UnalignedBufferOffset(BufferAddress), + #[error("Copy size {0} does not respect `COPY_BUFFER_ALIGNMENT`")] + UnalignedCopySize(BufferAddress), + #[error("Copy width is not a multiple of block width")] + UnalignedCopyWidth, + #[error("Copy height is not a multiple of block height")] + UnalignedCopyHeight, + #[error("Copy origin's x component is not a multiple of block width")] + UnalignedCopyOriginX, + #[error("Copy origin's y component is not a multiple of block height")] + UnalignedCopyOriginY, + #[error("Bytes per row does not respect `COPY_BYTES_PER_ROW_ALIGNMENT`")] + UnalignedBytesPerRow, + #[error("Number of bytes per row needs to be specified since more than one row is copied")] + UnspecifiedBytesPerRow, + #[error("Number of rows per image needs to be specified since more than one image is copied")] + UnspecifiedRowsPerImage, + #[error("Number of bytes per row is less than the number of bytes in a complete row")] + InvalidBytesPerRow, + #[error("Image is 1D and the copy height and depth are not both set to 1")] + InvalidCopySize, + #[error("Number of rows per image is invalid")] + InvalidRowsPerImage, + #[error("Copy source aspects must refer to all aspects of the source texture format")] + CopySrcMissingAspects, + #[error( + "Copy destination aspects must refer to all aspects of the destination texture format" + )] + CopyDstMissingAspects, + #[error("Copy aspect must refer to a single aspect of texture format")] + CopyAspectNotOne, + #[error("Copying from textures with format {format:?} and aspect {aspect:?} is forbidden")] + CopyFromForbiddenTextureFormat { + format: wgt::TextureFormat, + aspect: wgt::TextureAspect, + }, + #[error("Copying to textures with format {format:?} and aspect {aspect:?} is forbidden")] + CopyToForbiddenTextureFormat { + format: wgt::TextureFormat, + aspect: wgt::TextureAspect, + }, + #[error( + "Copying to textures with format {0:?} is forbidden when copying from external texture" + )] + ExternalCopyToForbiddenTextureFormat(wgt::TextureFormat), + #[error("The entire texture must be copied when copying from depth texture")] + InvalidDepthTextureExtent, + #[error( + "Source format ({src_format:?}) and destination format ({dst_format:?}) are not copy-compatible (they may only differ in srgb-ness)" + )] + TextureFormatsNotCopyCompatible { + src_format: wgt::TextureFormat, + dst_format: wgt::TextureFormat, + }, + #[error(transparent)] + MemoryInitFailure(#[from] ClearError), + #[error("Cannot encode this copy because of a missing downelevel flag")] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), + #[error("Source texture sample count must be 1, got {sample_count}")] + InvalidSampleCount { sample_count: u32 }, + #[error("Requested mip level {requested} does no exist (count: {count})")] + InvalidMipLevel { requested: u32, count: u32 }, +} + +impl PrettyError for TransferError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + match *self { + Self::InvalidBuffer(id) => { + fmt.buffer_label(&id); + } + Self::InvalidTexture(id) => { + fmt.texture_label(&id); + } + // Self::MissingCopySrcUsageFlag(buf_opt, tex_opt) => { + // if let Some(buf) = buf_opt { + // let name = crate::gfx_select!(buf => global.buffer_label(buf)); + // ret.push_str(&format_label_line("source", &name)); + // } + // if let Some(tex) = tex_opt { + // let name = crate::gfx_select!(tex => global.texture_label(tex)); + // ret.push_str(&format_label_line("source", &name)); + // } + // } + Self::MissingCopyDstUsageFlag(buf_opt, tex_opt) => { + if let Some(buf) = buf_opt { + fmt.buffer_label_with_key(&buf, "destination"); + } + if let Some(tex) = tex_opt { + fmt.texture_label_with_key(&tex, "destination"); + } + } + _ => {} + }; + } +} +/// Error encountered while attempting to do a copy on a command encoder. +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CopyError { + #[error(transparent)] + Encoder(#[from] CommandEncoderError), + #[error("Copy error")] + Transfer(#[from] TransferError), +} + +impl From<DeviceError> for CopyError { + fn from(err: DeviceError) -> Self { + CopyError::Encoder(CommandEncoderError::Device(err)) + } +} + +pub(crate) fn extract_texture_selector<A: HalApi>( + copy_texture: &ImageCopyTexture, + copy_size: &Extent3d, + texture: &Texture<A>, +) -> Result<(TextureSelector, hal::TextureCopyBase), TransferError> { + let format = texture.desc.format; + let copy_aspect = hal::FormatAspects::new(format, copy_texture.aspect); + if copy_aspect.is_empty() { + return Err(TransferError::InvalidTextureAspect { + format, + aspect: copy_texture.aspect, + }); + } + + let (layers, origin_z) = match texture.desc.dimension { + wgt::TextureDimension::D1 => (0..1, 0), + wgt::TextureDimension::D2 => ( + copy_texture.origin.z..copy_texture.origin.z + copy_size.depth_or_array_layers, + 0, + ), + wgt::TextureDimension::D3 => (0..1, copy_texture.origin.z), + }; + let base = hal::TextureCopyBase { + origin: wgt::Origin3d { + x: copy_texture.origin.x, + y: copy_texture.origin.y, + z: origin_z, + }, + // this value will be incremented per copied layer + array_layer: layers.start, + mip_level: copy_texture.mip_level, + aspect: copy_aspect, + }; + let selector = TextureSelector { + mips: copy_texture.mip_level..copy_texture.mip_level + 1, + layers, + }; + + Ok((selector, base)) +} + +/// WebGPU's [validating linear texture data][vltd] algorithm. +/// +/// Copied with some modifications from WebGPU standard. +/// +/// If successful, returns a pair `(bytes, stride)`, where: +/// - `bytes` is the number of buffer bytes required for this copy, and +/// - `stride` number of bytes between array layers. +/// +/// [vltd]: https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-linear-texture-data +pub(crate) fn validate_linear_texture_data( + layout: &wgt::ImageDataLayout, + format: wgt::TextureFormat, + aspect: wgt::TextureAspect, + buffer_size: BufferAddress, + buffer_side: CopySide, + copy_size: &Extent3d, + need_copy_aligned_rows: bool, +) -> Result<(BufferAddress, BufferAddress), TransferError> { + // Convert all inputs to BufferAddress (u64) to avoid some of the overflow issues + // Note: u64 is not always enough to prevent overflow, especially when multiplying + // something with a potentially large depth value, so it is preferable to validate + // the copy size before calling this function (for example via `validate_texture_copy_range`). + let copy_width = copy_size.width as BufferAddress; + let copy_height = copy_size.height as BufferAddress; + let copy_depth = copy_size.depth_or_array_layers as BufferAddress; + + let offset = layout.offset; + + let block_size = format.block_copy_size(Some(aspect)).unwrap() as BufferAddress; + let (block_width, block_height) = format.block_dimensions(); + let block_width = block_width as BufferAddress; + let block_height = block_height as BufferAddress; + + if copy_width % block_width != 0 { + return Err(TransferError::UnalignedCopyWidth); + } + if copy_height % block_height != 0 { + return Err(TransferError::UnalignedCopyHeight); + } + + let width_in_blocks = copy_width / block_width; + let height_in_blocks = copy_height / block_height; + + let bytes_in_last_row = width_in_blocks * block_size; + + let bytes_per_row = if let Some(bytes_per_row) = layout.bytes_per_row { + let bytes_per_row = bytes_per_row as BufferAddress; + if bytes_per_row < bytes_in_last_row { + return Err(TransferError::InvalidBytesPerRow); + } + bytes_per_row + } else { + if copy_depth > 1 || height_in_blocks > 1 { + return Err(TransferError::UnspecifiedBytesPerRow); + } + 0 + }; + let block_rows_per_image = if let Some(rows_per_image) = layout.rows_per_image { + let rows_per_image = rows_per_image as BufferAddress; + if rows_per_image < height_in_blocks { + return Err(TransferError::InvalidRowsPerImage); + } + rows_per_image + } else { + if copy_depth > 1 { + return Err(TransferError::UnspecifiedRowsPerImage); + } + 0 + }; + + if need_copy_aligned_rows { + let bytes_per_row_alignment = wgt::COPY_BYTES_PER_ROW_ALIGNMENT as BufferAddress; + + let mut offset_alignment = block_size; + if format.is_depth_stencil_format() { + offset_alignment = 4 + } + if offset % offset_alignment != 0 { + return Err(TransferError::UnalignedBufferOffset(offset)); + } + + if bytes_per_row % bytes_per_row_alignment != 0 { + return Err(TransferError::UnalignedBytesPerRow); + } + } + + let bytes_per_image = bytes_per_row * block_rows_per_image; + + let required_bytes_in_copy = if copy_depth == 0 { + 0 + } else { + let mut required_bytes_in_copy = bytes_per_image * (copy_depth - 1); + if height_in_blocks > 0 { + required_bytes_in_copy += bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row; + } + required_bytes_in_copy + }; + + if offset + required_bytes_in_copy > buffer_size { + return Err(TransferError::BufferOverrun { + start_offset: offset, + end_offset: offset + required_bytes_in_copy, + buffer_size, + side: buffer_side, + }); + } + + Ok((required_bytes_in_copy, bytes_per_image)) +} + +/// WebGPU's [validating texture copy range][vtcr] algorithm. +/// +/// Copied with minor modifications from WebGPU standard. +/// +/// Returns the HAL copy extent and the layer count. +/// +/// [vtcr]: https://gpuweb.github.io/gpuweb/#validating-texture-copy-range +pub(crate) fn validate_texture_copy_range( + texture_copy_view: &ImageCopyTexture, + desc: &wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>, + texture_side: CopySide, + copy_size: &Extent3d, +) -> Result<(hal::CopyExtent, u32), TransferError> { + let (block_width, block_height) = desc.format.block_dimensions(); + + let extent_virtual = desc.mip_level_size(texture_copy_view.mip_level).ok_or( + TransferError::InvalidTextureMipLevel { + level: texture_copy_view.mip_level, + total: desc.mip_level_count, + }, + )?; + // physical size can be larger than the virtual + let extent = extent_virtual.physical_size(desc.format); + + if desc.format.is_depth_stencil_format() && *copy_size != extent { + return Err(TransferError::InvalidDepthTextureExtent); + } + + /// Return `Ok` if a run `size` texels long starting at `start_offset` falls + /// entirely within `texture_size`. Otherwise, return an appropriate a`Err`. + fn check_dimension( + dimension: TextureErrorDimension, + side: CopySide, + start_offset: u32, + size: u32, + texture_size: u32, + ) -> Result<(), TransferError> { + // Avoid underflow in the subtraction by checking start_offset against + // texture_size first. + if start_offset <= texture_size && size <= texture_size - start_offset { + Ok(()) + } else { + Err(TransferError::TextureOverrun { + start_offset, + end_offset: start_offset.wrapping_add(size), + texture_size, + dimension, + side, + }) + } + } + + check_dimension( + TextureErrorDimension::X, + texture_side, + texture_copy_view.origin.x, + copy_size.width, + extent.width, + )?; + check_dimension( + TextureErrorDimension::Y, + texture_side, + texture_copy_view.origin.y, + copy_size.height, + extent.height, + )?; + check_dimension( + TextureErrorDimension::Z, + texture_side, + texture_copy_view.origin.z, + copy_size.depth_or_array_layers, + extent.depth_or_array_layers, + )?; + + if texture_copy_view.origin.x % block_width != 0 { + return Err(TransferError::UnalignedCopyOriginX); + } + if texture_copy_view.origin.y % block_height != 0 { + return Err(TransferError::UnalignedCopyOriginY); + } + if copy_size.width % block_width != 0 { + return Err(TransferError::UnalignedCopyWidth); + } + if copy_size.height % block_height != 0 { + return Err(TransferError::UnalignedCopyHeight); + } + + let (depth, array_layer_count) = match desc.dimension { + wgt::TextureDimension::D1 => (1, 1), + wgt::TextureDimension::D2 => (1, copy_size.depth_or_array_layers), + wgt::TextureDimension::D3 => (copy_size.depth_or_array_layers, 1), + }; + + let copy_extent = hal::CopyExtent { + width: copy_size.width, + height: copy_size.height, + depth, + }; + Ok((copy_extent, array_layer_count)) +} + +fn handle_texture_init<A: HalApi>( + init_kind: MemoryInitKind, + encoder: &mut CommandEncoder<A>, + trackers: &mut Tracker<A>, + texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>, + device: &Device<A>, + copy_texture: &ImageCopyTexture, + copy_size: &Extent3d, + texture: &Arc<Texture<A>>, +) -> Result<(), ClearError> { + let init_action = TextureInitTrackerAction { + texture: texture.clone(), + range: TextureInitRange { + mip_range: copy_texture.mip_level..copy_texture.mip_level + 1, + layer_range: copy_texture.origin.z + ..(copy_texture.origin.z + copy_size.depth_or_array_layers), + }, + kind: init_kind, + }; + + // Register the init action. + let immediate_inits = texture_memory_actions.register_init_action(&{ init_action }); + + // In rare cases we may need to insert an init operation immediately onto the command buffer. + if !immediate_inits.is_empty() { + let cmd_buf_raw = encoder.open()?; + for init in immediate_inits { + clear_texture( + &init.texture, + TextureInitRange { + mip_range: init.mip_level..(init.mip_level + 1), + layer_range: init.layer..(init.layer + 1), + }, + cmd_buf_raw, + &mut trackers.textures, + &device.alignments, + device.zero_buffer.as_ref().unwrap(), + )?; + } + } + + Ok(()) +} + +/// Prepare a transfer's source texture. +/// +/// Ensure the source texture of a transfer is in the right initialization +/// state, and record the state for after the transfer operation. +fn handle_src_texture_init<A: HalApi>( + encoder: &mut CommandEncoder<A>, + trackers: &mut Tracker<A>, + texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>, + device: &Device<A>, + source: &ImageCopyTexture, + copy_size: &Extent3d, + texture: &Arc<Texture<A>>, +) -> Result<(), TransferError> { + handle_texture_init( + MemoryInitKind::NeedsInitializedMemory, + encoder, + trackers, + texture_memory_actions, + device, + source, + copy_size, + texture, + )?; + Ok(()) +} + +/// Prepare a transfer's destination texture. +/// +/// Ensure the destination texture of a transfer is in the right initialization +/// state, and record the state for after the transfer operation. +fn handle_dst_texture_init<A: HalApi>( + encoder: &mut CommandEncoder<A>, + trackers: &mut Tracker<A>, + texture_memory_actions: &mut CommandBufferTextureMemoryActions<A>, + device: &Device<A>, + destination: &ImageCopyTexture, + copy_size: &Extent3d, + texture: &Arc<Texture<A>>, +) -> Result<(), TransferError> { + // Attention: If we don't write full texture subresources, we need to a full + // clear first since we don't track subrects. This means that in rare cases + // even a *destination* texture of a transfer may need an immediate texture + // init. + let dst_init_kind = if has_copy_partial_init_tracker_coverage( + copy_size, + destination.mip_level, + &texture.desc, + ) { + MemoryInitKind::NeedsInitializedMemory + } else { + MemoryInitKind::ImplicitlyInitialized + }; + + handle_texture_init( + dst_init_kind, + encoder, + trackers, + texture_memory_actions, + device, + destination, + copy_size, + texture, + )?; + Ok(()) +} + +impl Global { + pub fn command_encoder_copy_buffer_to_buffer<A: HalApi>( + &self, + command_encoder_id: CommandEncoderId, + source: BufferId, + source_offset: BufferAddress, + destination: BufferId, + destination_offset: BufferAddress, + size: BufferAddress, + ) -> Result<(), CopyError> { + profiling::scope!("CommandEncoder::copy_buffer_to_buffer"); + api_log!( + "CommandEncoder::copy_buffer_to_buffer {source:?} -> {destination:?} {size:?}bytes" + ); + + if source == destination { + return Err(TransferError::SameSourceDestinationBuffer.into()); + } + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?; + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + let device = &cmd_buf.device; + if !device.is_valid() { + return Err(TransferError::InvalidDevice(cmd_buf.device.as_info().id()).into()); + } + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(TraceCommand::CopyBufferToBuffer { + src: source, + src_offset: source_offset, + dst: destination, + dst_offset: destination_offset, + size, + }); + } + + let snatch_guard = device.snatchable_lock.read(); + + let (src_buffer, src_pending) = { + let buffer_guard = hub.buffers.read(); + let src_buffer = buffer_guard + .get(source) + .map_err(|_| TransferError::InvalidBuffer(source))?; + cmd_buf_data + .trackers + .buffers + .set_single(src_buffer, hal::BufferUses::COPY_SRC) + .ok_or(TransferError::InvalidBuffer(source))? + }; + let src_raw = src_buffer + .raw + .get(&snatch_guard) + .ok_or(TransferError::InvalidBuffer(source))?; + if !src_buffer.usage.contains(BufferUsages::COPY_SRC) { + return Err(TransferError::MissingCopySrcUsageFlag.into()); + } + // expecting only a single barrier + let src_barrier = src_pending.map(|pending| pending.into_hal(&src_buffer, &snatch_guard)); + + let (dst_buffer, dst_pending) = { + let buffer_guard = hub.buffers.read(); + let dst_buffer = buffer_guard + .get(destination) + .map_err(|_| TransferError::InvalidBuffer(destination))?; + cmd_buf_data + .trackers + .buffers + .set_single(dst_buffer, hal::BufferUses::COPY_DST) + .ok_or(TransferError::InvalidBuffer(destination))? + }; + let dst_raw = dst_buffer + .raw + .get(&snatch_guard) + .ok_or(TransferError::InvalidBuffer(destination))?; + if !dst_buffer.usage.contains(BufferUsages::COPY_DST) { + return Err(TransferError::MissingCopyDstUsageFlag(Some(destination), None).into()); + } + let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard)); + + if size % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedCopySize(size).into()); + } + if source_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedBufferOffset(source_offset).into()); + } + if destination_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedBufferOffset(destination_offset).into()); + } + if !device + .downlevel + .flags + .contains(wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER) + && (src_buffer.usage.contains(wgt::BufferUsages::INDEX) + || dst_buffer.usage.contains(wgt::BufferUsages::INDEX)) + { + let forbidden_usages = wgt::BufferUsages::VERTEX + | wgt::BufferUsages::UNIFORM + | wgt::BufferUsages::INDIRECT + | wgt::BufferUsages::STORAGE; + if src_buffer.usage.intersects(forbidden_usages) + || dst_buffer.usage.intersects(forbidden_usages) + { + return Err(TransferError::MissingDownlevelFlags(MissingDownlevelFlags( + wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER, + )) + .into()); + } + } + + let source_end_offset = source_offset + size; + let destination_end_offset = destination_offset + size; + if source_end_offset > src_buffer.size { + return Err(TransferError::BufferOverrun { + start_offset: source_offset, + end_offset: source_end_offset, + buffer_size: src_buffer.size, + side: CopySide::Source, + } + .into()); + } + if destination_end_offset > dst_buffer.size { + return Err(TransferError::BufferOverrun { + start_offset: destination_offset, + end_offset: destination_end_offset, + buffer_size: dst_buffer.size, + side: CopySide::Destination, + } + .into()); + } + + if size == 0 { + log::trace!("Ignoring copy_buffer_to_buffer of size 0"); + return Ok(()); + } + + // Make sure source is initialized memory and mark dest as initialized. + cmd_buf_data.buffer_memory_init_actions.extend( + dst_buffer.initialization_status.read().create_action( + &dst_buffer, + destination_offset..(destination_offset + size), + MemoryInitKind::ImplicitlyInitialized, + ), + ); + cmd_buf_data.buffer_memory_init_actions.extend( + src_buffer.initialization_status.read().create_action( + &src_buffer, + source_offset..(source_offset + size), + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + let region = hal::BufferCopy { + src_offset: source_offset, + dst_offset: destination_offset, + size: wgt::BufferSize::new(size).unwrap(), + }; + let cmd_buf_raw = cmd_buf_data.encoder.open()?; + unsafe { + cmd_buf_raw.transition_buffers(src_barrier.into_iter().chain(dst_barrier)); + cmd_buf_raw.copy_buffer_to_buffer(src_raw, dst_raw, iter::once(region)); + } + Ok(()) + } + + pub fn command_encoder_copy_buffer_to_texture<A: HalApi>( + &self, + command_encoder_id: CommandEncoderId, + source: &ImageCopyBuffer, + destination: &ImageCopyTexture, + copy_size: &Extent3d, + ) -> Result<(), CopyError> { + profiling::scope!("CommandEncoder::copy_buffer_to_texture"); + api_log!( + "CommandEncoder::copy_buffer_to_texture {:?} -> {:?} {copy_size:?}", + source.buffer, + destination.texture + ); + + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?; + let device = &cmd_buf.device; + if !device.is_valid() { + return Err(TransferError::InvalidDevice(cmd_buf.device.as_info().id()).into()); + } + + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(TraceCommand::CopyBufferToTexture { + src: *source, + dst: *destination, + size: *copy_size, + }); + } + + let encoder = &mut cmd_buf_data.encoder; + let tracker = &mut cmd_buf_data.trackers; + let buffer_memory_init_actions = &mut cmd_buf_data.buffer_memory_init_actions; + let texture_memory_actions = &mut cmd_buf_data.texture_memory_actions; + + if copy_size.width == 0 || copy_size.height == 0 || copy_size.depth_or_array_layers == 0 { + log::trace!("Ignoring copy_buffer_to_texture of size 0"); + return Ok(()); + } + + let dst_texture = hub + .textures + .get(destination.texture) + .map_err(|_| TransferError::InvalidTexture(destination.texture))?; + + let (hal_copy_size, array_layer_count) = validate_texture_copy_range( + destination, + &dst_texture.desc, + CopySide::Destination, + copy_size, + )?; + + let (dst_range, dst_base) = extract_texture_selector(destination, copy_size, &dst_texture)?; + + // Handle texture init *before* dealing with barrier transitions so we + // have an easier time inserting "immediate-inits" that may be required + // by prior discards in rare cases. + handle_dst_texture_init( + encoder, + tracker, + texture_memory_actions, + device, + destination, + copy_size, + &dst_texture, + )?; + + let snatch_guard = device.snatchable_lock.read(); + + let (src_buffer, src_pending) = { + let buffer_guard = hub.buffers.read(); + let src_buffer = buffer_guard + .get(source.buffer) + .map_err(|_| TransferError::InvalidBuffer(source.buffer))?; + tracker + .buffers + .set_single(src_buffer, hal::BufferUses::COPY_SRC) + .ok_or(TransferError::InvalidBuffer(source.buffer))? + }; + let src_raw = src_buffer + .raw + .get(&snatch_guard) + .ok_or(TransferError::InvalidBuffer(source.buffer))?; + if !src_buffer.usage.contains(BufferUsages::COPY_SRC) { + return Err(TransferError::MissingCopySrcUsageFlag.into()); + } + let src_barrier = src_pending.map(|pending| pending.into_hal(&src_buffer, &snatch_guard)); + + let dst_pending = tracker + .textures + .set_single(&dst_texture, dst_range, hal::TextureUses::COPY_DST) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + let dst_raw = dst_texture + .raw(&snatch_guard) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + if !dst_texture.desc.usage.contains(TextureUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), + ); + } + let dst_barrier = dst_pending.map(|pending| pending.into_hal(dst_raw)); + + if !dst_base.aspect.is_one() { + return Err(TransferError::CopyAspectNotOne.into()); + } + + if !conv::is_valid_copy_dst_texture_format(dst_texture.desc.format, destination.aspect) { + return Err(TransferError::CopyToForbiddenTextureFormat { + format: dst_texture.desc.format, + aspect: destination.aspect, + } + .into()); + } + + let (required_buffer_bytes_in_copy, bytes_per_array_layer) = validate_linear_texture_data( + &source.layout, + dst_texture.desc.format, + destination.aspect, + src_buffer.size, + CopySide::Source, + copy_size, + true, + )?; + + if dst_texture.desc.format.is_depth_stencil_format() { + device + .require_downlevel_flags(wgt::DownlevelFlags::DEPTH_TEXTURE_AND_BUFFER_COPIES) + .map_err(TransferError::from)?; + } + + buffer_memory_init_actions.extend(src_buffer.initialization_status.read().create_action( + &src_buffer, + source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy), + MemoryInitKind::NeedsInitializedMemory, + )); + + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = dst_base.clone(); + texture_base.array_layer += rel_array_layer; + let mut buffer_layout = source.layout; + buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer; + hal::BufferTextureCopy { + buffer_layout, + texture_base, + size: hal_copy_size, + } + }); + + let cmd_buf_raw = encoder.open()?; + unsafe { + cmd_buf_raw.transition_textures(dst_barrier.into_iter()); + cmd_buf_raw.transition_buffers(src_barrier.into_iter()); + cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, regions); + } + Ok(()) + } + + pub fn command_encoder_copy_texture_to_buffer<A: HalApi>( + &self, + command_encoder_id: CommandEncoderId, + source: &ImageCopyTexture, + destination: &ImageCopyBuffer, + copy_size: &Extent3d, + ) -> Result<(), CopyError> { + profiling::scope!("CommandEncoder::copy_texture_to_buffer"); + api_log!( + "CommandEncoder::copy_texture_to_buffer {:?} -> {:?} {copy_size:?}", + source.texture, + destination.buffer + ); + + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?; + let device = &cmd_buf.device; + if !device.is_valid() { + return Err(TransferError::InvalidDevice(cmd_buf.device.as_info().id()).into()); + } + + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(TraceCommand::CopyTextureToBuffer { + src: *source, + dst: *destination, + size: *copy_size, + }); + } + let encoder = &mut cmd_buf_data.encoder; + let tracker = &mut cmd_buf_data.trackers; + let buffer_memory_init_actions = &mut cmd_buf_data.buffer_memory_init_actions; + let texture_memory_actions = &mut cmd_buf_data.texture_memory_actions; + + if copy_size.width == 0 || copy_size.height == 0 || copy_size.depth_or_array_layers == 0 { + log::trace!("Ignoring copy_texture_to_buffer of size 0"); + return Ok(()); + } + + let src_texture = hub + .textures + .get(source.texture) + .map_err(|_| TransferError::InvalidTexture(source.texture))?; + + let (hal_copy_size, array_layer_count) = + validate_texture_copy_range(source, &src_texture.desc, CopySide::Source, copy_size)?; + + let (src_range, src_base) = extract_texture_selector(source, copy_size, &src_texture)?; + + // Handle texture init *before* dealing with barrier transitions so we + // have an easier time inserting "immediate-inits" that may be required + // by prior discards in rare cases. + handle_src_texture_init( + encoder, + tracker, + texture_memory_actions, + device, + source, + copy_size, + &src_texture, + )?; + + let snatch_guard = device.snatchable_lock.read(); + + let src_pending = tracker + .textures + .set_single(&src_texture, src_range, hal::TextureUses::COPY_SRC) + .ok_or(TransferError::InvalidTexture(source.texture))?; + let src_raw = src_texture + .raw(&snatch_guard) + .ok_or(TransferError::InvalidTexture(source.texture))?; + if !src_texture.desc.usage.contains(TextureUsages::COPY_SRC) { + return Err(TransferError::MissingCopySrcUsageFlag.into()); + } + if src_texture.desc.sample_count != 1 { + return Err(TransferError::InvalidSampleCount { + sample_count: src_texture.desc.sample_count, + } + .into()); + } + if source.mip_level >= src_texture.desc.mip_level_count { + return Err(TransferError::InvalidMipLevel { + requested: source.mip_level, + count: src_texture.desc.mip_level_count, + } + .into()); + } + let src_barrier = src_pending.map(|pending| pending.into_hal(src_raw)); + + let (dst_buffer, dst_pending) = { + let buffer_guard = hub.buffers.read(); + let dst_buffer = buffer_guard + .get(destination.buffer) + .map_err(|_| TransferError::InvalidBuffer(destination.buffer))?; + tracker + .buffers + .set_single(dst_buffer, hal::BufferUses::COPY_DST) + .ok_or(TransferError::InvalidBuffer(destination.buffer))? + }; + let dst_raw = dst_buffer + .raw + .get(&snatch_guard) + .ok_or(TransferError::InvalidBuffer(destination.buffer))?; + if !dst_buffer.usage.contains(BufferUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(Some(destination.buffer), None).into(), + ); + } + let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard)); + + if !src_base.aspect.is_one() { + return Err(TransferError::CopyAspectNotOne.into()); + } + + if !conv::is_valid_copy_src_texture_format(src_texture.desc.format, source.aspect) { + return Err(TransferError::CopyFromForbiddenTextureFormat { + format: src_texture.desc.format, + aspect: source.aspect, + } + .into()); + } + + let (required_buffer_bytes_in_copy, bytes_per_array_layer) = validate_linear_texture_data( + &destination.layout, + src_texture.desc.format, + source.aspect, + dst_buffer.size, + CopySide::Destination, + copy_size, + true, + )?; + + if src_texture.desc.format.is_depth_stencil_format() { + device + .require_downlevel_flags(wgt::DownlevelFlags::DEPTH_TEXTURE_AND_BUFFER_COPIES) + .map_err(TransferError::from)?; + } + + buffer_memory_init_actions.extend(dst_buffer.initialization_status.read().create_action( + &dst_buffer, + destination.layout.offset..(destination.layout.offset + required_buffer_bytes_in_copy), + MemoryInitKind::ImplicitlyInitialized, + )); + + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = src_base.clone(); + texture_base.array_layer += rel_array_layer; + let mut buffer_layout = destination.layout; + buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer; + hal::BufferTextureCopy { + buffer_layout, + texture_base, + size: hal_copy_size, + } + }); + let cmd_buf_raw = encoder.open()?; + unsafe { + cmd_buf_raw.transition_buffers(dst_barrier.into_iter()); + cmd_buf_raw.transition_textures(src_barrier.into_iter()); + cmd_buf_raw.copy_texture_to_buffer( + src_raw, + hal::TextureUses::COPY_SRC, + dst_raw, + regions, + ); + } + Ok(()) + } + + pub fn command_encoder_copy_texture_to_texture<A: HalApi>( + &self, + command_encoder_id: CommandEncoderId, + source: &ImageCopyTexture, + destination: &ImageCopyTexture, + copy_size: &Extent3d, + ) -> Result<(), CopyError> { + profiling::scope!("CommandEncoder::copy_texture_to_texture"); + api_log!( + "CommandEncoder::copy_texture_to_texture {:?} -> {:?} {copy_size:?}", + source.texture, + destination.texture + ); + + let hub = A::hub(self); + + let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?; + let device = &cmd_buf.device; + if !device.is_valid() { + return Err(TransferError::InvalidDevice(cmd_buf.device.as_info().id()).into()); + } + + let snatch_guard = device.snatchable_lock.read(); + + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf_data.commands { + list.push(TraceCommand::CopyTextureToTexture { + src: *source, + dst: *destination, + size: *copy_size, + }); + } + let encoder = &mut cmd_buf_data.encoder; + let tracker = &mut cmd_buf_data.trackers; + let texture_memory_actions = &mut cmd_buf_data.texture_memory_actions; + + if copy_size.width == 0 || copy_size.height == 0 || copy_size.depth_or_array_layers == 0 { + log::trace!("Ignoring copy_texture_to_texture of size 0"); + return Ok(()); + } + + let src_texture = hub + .textures + .get(source.texture) + .map_err(|_| TransferError::InvalidTexture(source.texture))?; + let dst_texture = hub + .textures + .get(destination.texture) + .map_err(|_| TransferError::InvalidTexture(source.texture))?; + + // src and dst texture format must be copy-compatible + // https://gpuweb.github.io/gpuweb/#copy-compatible + if src_texture.desc.format.remove_srgb_suffix() + != dst_texture.desc.format.remove_srgb_suffix() + { + return Err(TransferError::TextureFormatsNotCopyCompatible { + src_format: src_texture.desc.format, + dst_format: dst_texture.desc.format, + } + .into()); + } + + let (src_copy_size, array_layer_count) = + validate_texture_copy_range(source, &src_texture.desc, CopySide::Source, copy_size)?; + let (dst_copy_size, _) = validate_texture_copy_range( + destination, + &dst_texture.desc, + CopySide::Destination, + copy_size, + )?; + + let (src_range, src_tex_base) = extract_texture_selector(source, copy_size, &src_texture)?; + let (dst_range, dst_tex_base) = + extract_texture_selector(destination, copy_size, &dst_texture)?; + let src_texture_aspects = hal::FormatAspects::from(src_texture.desc.format); + let dst_texture_aspects = hal::FormatAspects::from(dst_texture.desc.format); + if src_tex_base.aspect != src_texture_aspects { + return Err(TransferError::CopySrcMissingAspects.into()); + } + if dst_tex_base.aspect != dst_texture_aspects { + return Err(TransferError::CopyDstMissingAspects.into()); + } + + // Handle texture init *before* dealing with barrier transitions so we + // have an easier time inserting "immediate-inits" that may be required + // by prior discards in rare cases. + handle_src_texture_init( + encoder, + tracker, + texture_memory_actions, + device, + source, + copy_size, + &src_texture, + )?; + handle_dst_texture_init( + encoder, + tracker, + texture_memory_actions, + device, + destination, + copy_size, + &dst_texture, + )?; + + let src_pending = cmd_buf_data + .trackers + .textures + .set_single(&src_texture, src_range, hal::TextureUses::COPY_SRC) + .ok_or(TransferError::InvalidTexture(source.texture))?; + let src_raw = src_texture + .raw(&snatch_guard) + .ok_or(TransferError::InvalidTexture(source.texture))?; + if !src_texture.desc.usage.contains(TextureUsages::COPY_SRC) { + return Err(TransferError::MissingCopySrcUsageFlag.into()); + } + + //TODO: try to avoid this the collection. It's needed because both + // `src_pending` and `dst_pending` try to hold `trackers.textures` mutably. + let mut barriers: ArrayVec<_, 2> = src_pending + .map(|pending| pending.into_hal(src_raw)) + .collect(); + + let dst_pending = cmd_buf_data + .trackers + .textures + .set_single(&dst_texture, dst_range, hal::TextureUses::COPY_DST) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + let dst_raw = dst_texture + .raw(&snatch_guard) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + if !dst_texture.desc.usage.contains(TextureUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), + ); + } + + barriers.extend(dst_pending.map(|pending| pending.into_hal(dst_raw))); + + let hal_copy_size = hal::CopyExtent { + width: src_copy_size.width.min(dst_copy_size.width), + height: src_copy_size.height.min(dst_copy_size.height), + depth: src_copy_size.depth.min(dst_copy_size.depth), + }; + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut src_base = src_tex_base.clone(); + let mut dst_base = dst_tex_base.clone(); + src_base.array_layer += rel_array_layer; + dst_base.array_layer += rel_array_layer; + hal::TextureCopy { + src_base, + dst_base, + size: hal_copy_size, + } + }); + let cmd_buf_raw = cmd_buf_data.encoder.open()?; + unsafe { + cmd_buf_raw.transition_textures(barriers.into_iter()); + cmd_buf_raw.copy_texture_to_texture( + src_raw, + hal::TextureUses::COPY_SRC, + dst_raw, + regions, + ); + } + + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/conv.rs b/third_party/rust/wgpu-core/src/conv.rs new file mode 100644 index 0000000000..0b67ad3cbe --- /dev/null +++ b/third_party/rust/wgpu-core/src/conv.rs @@ -0,0 +1,250 @@ +use wgt::TextureFormatFeatures; + +use crate::resource::{self, TextureDescriptor}; + +pub fn is_power_of_two_u16(val: u16) -> bool { + val != 0 && (val & (val - 1)) == 0 +} + +pub fn is_power_of_two_u32(val: u32) -> bool { + val != 0 && (val & (val - 1)) == 0 +} + +pub fn is_valid_copy_src_texture_format( + format: wgt::TextureFormat, + aspect: wgt::TextureAspect, +) -> bool { + use wgt::TextureAspect as Ta; + use wgt::TextureFormat as Tf; + match (format, aspect) { + (Tf::Depth24Plus, _) | (Tf::Depth24PlusStencil8, Ta::DepthOnly) => false, + _ => true, + } +} + +pub fn is_valid_copy_dst_texture_format( + format: wgt::TextureFormat, + aspect: wgt::TextureAspect, +) -> bool { + use wgt::TextureAspect as Ta; + use wgt::TextureFormat as Tf; + match (format, aspect) { + (Tf::Depth24Plus | Tf::Depth32Float, _) + | (Tf::Depth24PlusStencil8 | Tf::Depth32FloatStencil8, Ta::DepthOnly) => false, + _ => true, + } +} + +#[cfg_attr( + any(not(target_arch = "wasm32"), target_os = "emscripten"), + allow(unused) +)] +pub fn is_valid_external_image_copy_dst_texture_format(format: wgt::TextureFormat) -> bool { + use wgt::TextureFormat as Tf; + match format { + Tf::R8Unorm + | Tf::R16Float + | Tf::R32Float + | Tf::Rg8Unorm + | Tf::Rg16Float + | Tf::Rg32Float + | Tf::Rgba8Unorm + | Tf::Rgba8UnormSrgb + | Tf::Bgra8Unorm + | Tf::Bgra8UnormSrgb + | Tf::Rgb10a2Unorm + | Tf::Rgba16Float + | Tf::Rgba32Float => true, + _ => false, + } +} + +pub fn map_buffer_usage(usage: wgt::BufferUsages) -> hal::BufferUses { + let mut u = hal::BufferUses::empty(); + u.set( + hal::BufferUses::MAP_READ, + usage.contains(wgt::BufferUsages::MAP_READ), + ); + u.set( + hal::BufferUses::MAP_WRITE, + usage.contains(wgt::BufferUsages::MAP_WRITE), + ); + u.set( + hal::BufferUses::COPY_SRC, + usage.contains(wgt::BufferUsages::COPY_SRC), + ); + u.set( + hal::BufferUses::COPY_DST, + usage.contains(wgt::BufferUsages::COPY_DST), + ); + u.set( + hal::BufferUses::INDEX, + usage.contains(wgt::BufferUsages::INDEX), + ); + u.set( + hal::BufferUses::VERTEX, + usage.contains(wgt::BufferUsages::VERTEX), + ); + u.set( + hal::BufferUses::UNIFORM, + usage.contains(wgt::BufferUsages::UNIFORM), + ); + u.set( + hal::BufferUses::STORAGE_READ | hal::BufferUses::STORAGE_READ_WRITE, + usage.contains(wgt::BufferUsages::STORAGE), + ); + u.set( + hal::BufferUses::INDIRECT, + usage.contains(wgt::BufferUsages::INDIRECT), + ); + u.set( + hal::BufferUses::QUERY_RESOLVE, + usage.contains(wgt::BufferUsages::QUERY_RESOLVE), + ); + u +} + +pub fn map_texture_usage( + usage: wgt::TextureUsages, + aspect: hal::FormatAspects, +) -> hal::TextureUses { + let mut u = hal::TextureUses::empty(); + u.set( + hal::TextureUses::COPY_SRC, + usage.contains(wgt::TextureUsages::COPY_SRC), + ); + u.set( + hal::TextureUses::COPY_DST, + usage.contains(wgt::TextureUsages::COPY_DST), + ); + u.set( + hal::TextureUses::RESOURCE, + usage.contains(wgt::TextureUsages::TEXTURE_BINDING), + ); + u.set( + hal::TextureUses::STORAGE_READ | hal::TextureUses::STORAGE_READ_WRITE, + usage.contains(wgt::TextureUsages::STORAGE_BINDING), + ); + let is_color = aspect.contains(hal::FormatAspects::COLOR); + u.set( + hal::TextureUses::COLOR_TARGET, + usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) && is_color, + ); + u.set( + hal::TextureUses::DEPTH_STENCIL_READ | hal::TextureUses::DEPTH_STENCIL_WRITE, + usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) && !is_color, + ); + u +} + +pub fn map_texture_usage_for_texture( + desc: &TextureDescriptor, + format_features: &TextureFormatFeatures, +) -> hal::TextureUses { + // Enforce having COPY_DST/DEPTH_STENCIL_WRITE/COLOR_TARGET otherwise we + // wouldn't be able to initialize the texture. + map_texture_usage(desc.usage, desc.format.into()) + | if desc.format.is_depth_stencil_format() { + hal::TextureUses::DEPTH_STENCIL_WRITE + } else if desc.usage.contains(wgt::TextureUsages::COPY_DST) { + hal::TextureUses::COPY_DST // (set already) + } else { + // Use COPY_DST only if we can't use COLOR_TARGET + if format_features + .allowed_usages + .contains(wgt::TextureUsages::RENDER_ATTACHMENT) + && desc.dimension == wgt::TextureDimension::D2 + // Render targets dimension must be 2d + { + hal::TextureUses::COLOR_TARGET + } else { + hal::TextureUses::COPY_DST + } + } +} + +pub fn map_texture_usage_from_hal(uses: hal::TextureUses) -> wgt::TextureUsages { + let mut u = wgt::TextureUsages::empty(); + u.set( + wgt::TextureUsages::COPY_SRC, + uses.contains(hal::TextureUses::COPY_SRC), + ); + u.set( + wgt::TextureUsages::COPY_DST, + uses.contains(hal::TextureUses::COPY_DST), + ); + u.set( + wgt::TextureUsages::TEXTURE_BINDING, + uses.contains(hal::TextureUses::RESOURCE), + ); + u.set( + wgt::TextureUsages::STORAGE_BINDING, + uses.contains(hal::TextureUses::STORAGE_READ | hal::TextureUses::STORAGE_READ_WRITE), + ); + u.set( + wgt::TextureUsages::RENDER_ATTACHMENT, + uses.contains(hal::TextureUses::COLOR_TARGET), + ); + u +} + +pub fn check_texture_dimension_size( + dimension: wgt::TextureDimension, + wgt::Extent3d { + width, + height, + depth_or_array_layers, + }: wgt::Extent3d, + sample_size: u32, + limits: &wgt::Limits, +) -> Result<(), resource::TextureDimensionError> { + use resource::{TextureDimensionError as Tde, TextureErrorDimension as Ted}; + use wgt::TextureDimension::*; + + let (extent_limits, sample_limit) = match dimension { + D1 => ([limits.max_texture_dimension_1d, 1, 1], 1), + D2 => ( + [ + limits.max_texture_dimension_2d, + limits.max_texture_dimension_2d, + limits.max_texture_array_layers, + ], + 32, + ), + D3 => ( + [ + limits.max_texture_dimension_3d, + limits.max_texture_dimension_3d, + limits.max_texture_dimension_3d, + ], + 1, + ), + }; + + for (&dim, (&given, &limit)) in [Ted::X, Ted::Y, Ted::Z].iter().zip( + [width, height, depth_or_array_layers] + .iter() + .zip(extent_limits.iter()), + ) { + if given == 0 { + return Err(Tde::Zero(dim)); + } + if given > limit { + return Err(Tde::LimitExceeded { dim, given, limit }); + } + } + if sample_size == 0 || sample_size > sample_limit || !is_power_of_two_u32(sample_size) { + return Err(Tde::InvalidSampleCount(sample_size)); + } + + Ok(()) +} + +pub fn bind_group_layout_flags(features: wgt::Features) -> hal::BindGroupLayoutFlags { + let mut flags = hal::BindGroupLayoutFlags::empty(); + flags.set( + hal::BindGroupLayoutFlags::PARTIALLY_BOUND, + features.contains(wgt::Features::PARTIALLY_BOUND_BINDING_ARRAY), + ); + flags +} diff --git a/third_party/rust/wgpu-core/src/device/any_device.rs b/third_party/rust/wgpu-core/src/device/any_device.rs new file mode 100644 index 0000000000..693155a753 --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/any_device.rs @@ -0,0 +1,102 @@ +use wgt::Backend; + +use super::Device; +/// The `AnyDevice` type: a pointer to a `Device<A>` for any backend `A`. +use crate::hal_api::HalApi; + +use std::fmt; +use std::mem::ManuallyDrop; +use std::ptr::NonNull; +use std::sync::Arc; + +struct AnyDeviceVtable { + // We oppurtunistically store the backend here, since we now it will be used + // with backend selection and it can be stored in static memory. + backend: Backend, + // Drop glue which knows how to drop the stored data. + drop: unsafe fn(*mut ()), +} + +/// A pointer to a `Device<A>`, for any backend `A`. +/// +/// Any `AnyDevice` is just like an `Arc<Device<A>>`, except that the `A` type +/// parameter is erased. To access the `Device`, you must downcast to a +/// particular backend with the \[`downcast_ref`\] or \[`downcast_clone`\] +/// methods. +pub struct AnyDevice { + data: NonNull<()>, + vtable: &'static AnyDeviceVtable, +} + +impl AnyDevice { + /// Return an `AnyDevice` that holds an owning `Arc` pointer to `device`. + pub fn new<A: HalApi>(device: Arc<Device<A>>) -> AnyDevice { + unsafe fn drop_glue<A: HalApi>(ptr: *mut ()) { + // Drop the arc this instance is holding. + unsafe { + _ = Arc::from_raw(ptr.cast::<A::Surface>()); + } + } + + // SAFETY: The pointer returned by Arc::into_raw is guaranteed to be + // non-null. + let data = unsafe { NonNull::new_unchecked(Arc::into_raw(device).cast_mut()) }; + + AnyDevice { + data: data.cast(), + vtable: &AnyDeviceVtable { + backend: A::VARIANT, + drop: drop_glue::<A>, + }, + } + } + + /// If `self` is an `Arc<Device<A>>`, return a reference to the + /// device. + pub fn downcast_ref<A: HalApi>(&self) -> Option<&Device<A>> { + if self.vtable.backend != A::VARIANT { + return None; + } + + // SAFETY: We just checked the instance above implicitly by the backend + // that it was statically constructed through. + Some(unsafe { &*(self.data.as_ptr().cast::<Device<A>>()) }) + } + + /// If `self` is an `Arc<Device<A>>`, return a clone of that. + pub fn downcast_clone<A: HalApi>(&self) -> Option<Arc<Device<A>>> { + if self.vtable.backend != A::VARIANT { + return None; + } + + // We need to prevent the destructor of the arc from running, since it + // refers to the instance held by this object. Dropping it would + // invalidate this object. + // + // SAFETY: We just checked the instance above implicitly by the backend + // that it was statically constructed through. + let this = + ManuallyDrop::new(unsafe { Arc::from_raw(self.data.as_ptr().cast::<Device<A>>()) }); + + // Cloning it increases the reference count, and we return a new arc + // instance. + Some((*this).clone()) + } +} + +impl Drop for AnyDevice { + fn drop(&mut self) { + unsafe { (self.vtable.drop)(self.data.as_ptr()) } + } +} + +impl fmt::Debug for AnyDevice { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "AnyDevice<{}>", self.vtable.backend) + } +} + +#[cfg(send_sync)] +unsafe impl Send for AnyDevice {} +#[cfg(send_sync)] +unsafe impl Sync for AnyDevice {} diff --git a/third_party/rust/wgpu-core/src/device/bgl.rs b/third_party/rust/wgpu-core/src/device/bgl.rs new file mode 100644 index 0000000000..d606f049a3 --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/bgl.rs @@ -0,0 +1,129 @@ +use std::hash::{Hash, Hasher}; + +use crate::{ + binding_model::{self}, + FastIndexMap, +}; + +/// Where a given BGL came from. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum Origin { + /// The bind group layout was created by the user and is present in the BGL resource pool. + Pool, + /// The bind group layout was derived and is not present in the BGL resource pool. + Derived, +} + +/// A HashMap-like structure that stores a BindGroupLayouts [`wgt::BindGroupLayoutEntry`]s. +/// +/// It is hashable, so bind group layouts can be deduplicated. +#[derive(Debug, Default, Clone, Eq)] +pub struct EntryMap { + /// We use a IndexMap here so that we can sort the entries by their binding index, + /// guaranteeing that the hash of equivalent layouts will be the same. + inner: FastIndexMap<u32, wgt::BindGroupLayoutEntry>, + /// We keep track of whether the map is sorted or not, so that we can assert that + /// it is sorted, so that PartialEq and Hash will be stable. + /// + /// We only need sorted if it is used in a Hash or PartialEq, so we never need + /// to actively sort it. + sorted: bool, +} + +impl PartialEq for EntryMap { + fn eq(&self, other: &Self) -> bool { + self.assert_sorted(); + other.assert_sorted(); + + self.inner == other.inner + } +} + +impl Hash for EntryMap { + fn hash<H: Hasher>(&self, state: &mut H) { + self.assert_sorted(); + + // We don't need to hash the keys, since they are just extracted from the values. + // + // We know this is stable and will match the behavior of PartialEq as we ensure + // that the array is sorted. + for entry in self.inner.values() { + entry.hash(state); + } + } +} + +impl EntryMap { + fn assert_sorted(&self) { + assert!(self.sorted); + } + + /// Create a new [`BindGroupLayoutEntryMap`] from a slice of [`wgt::BindGroupLayoutEntry`]s. + /// + /// Errors if there are duplicate bindings or if any binding index is greater than + /// the device's limits. + pub fn from_entries( + device_limits: &wgt::Limits, + entries: &[wgt::BindGroupLayoutEntry], + ) -> Result<Self, binding_model::CreateBindGroupLayoutError> { + let mut inner = FastIndexMap::with_capacity_and_hasher(entries.len(), Default::default()); + for entry in entries { + if entry.binding >= device_limits.max_bindings_per_bind_group { + return Err( + binding_model::CreateBindGroupLayoutError::InvalidBindingIndex { + binding: entry.binding, + maximum: device_limits.max_bindings_per_bind_group, + }, + ); + } + if inner.insert(entry.binding, *entry).is_some() { + return Err(binding_model::CreateBindGroupLayoutError::ConflictBinding( + entry.binding, + )); + } + } + inner.sort_unstable_keys(); + + Ok(Self { + inner, + sorted: true, + }) + } + + /// Get the count of [`wgt::BindGroupLayoutEntry`]s in this map. + pub fn len(&self) -> usize { + self.inner.len() + } + + /// Get the [`wgt::BindGroupLayoutEntry`] for the given binding index. + pub fn get(&self, binding: u32) -> Option<&wgt::BindGroupLayoutEntry> { + self.inner.get(&binding) + } + + /// Iterator over all the binding indices in this map. + pub fn indices(&self) -> impl ExactSizeIterator<Item = u32> + '_ { + self.inner.keys().copied() + } + + /// Iterator over all the [`wgt::BindGroupLayoutEntry`]s in this map. + pub fn values(&self) -> impl ExactSizeIterator<Item = &wgt::BindGroupLayoutEntry> + '_ { + self.inner.values() + } + + pub fn iter(&self) -> impl ExactSizeIterator<Item = (&u32, &wgt::BindGroupLayoutEntry)> + '_ { + self.inner.iter() + } + + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + pub fn contains_key(&self, key: u32) -> bool { + self.inner.contains_key(&key) + } + + pub fn entry(&mut self, key: u32) -> indexmap::map::Entry<'_, u32, wgt::BindGroupLayoutEntry> { + self.sorted = false; + self.inner.entry(key) + } +} diff --git a/third_party/rust/wgpu-core/src/device/global.rs b/third_party/rust/wgpu-core/src/device/global.rs new file mode 100644 index 0000000000..64fd6d4de7 --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/global.rs @@ -0,0 +1,2559 @@ +#[cfg(feature = "trace")] +use crate::device::trace; +use crate::{ + api_log, binding_model, command, conv, + device::{ + bgl, life::WaitIdleError, map_buffer, queue, DeviceError, DeviceLostClosure, + DeviceLostReason, HostMap, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL, + }, + global::Global, + hal_api::HalApi, + id::{self, AdapterId, DeviceId, QueueId, SurfaceId}, + init_tracker::TextureInitTracker, + instance::{self, Adapter, Surface}, + pipeline, present, + resource::{self, BufferAccessResult}, + resource::{BufferAccessError, BufferMapOperation, CreateBufferError, Resource}, + validation::check_buffer_usage, + Label, LabelHelpers as _, +}; + +use arrayvec::ArrayVec; +use hal::Device as _; +use parking_lot::RwLock; + +use wgt::{BufferAddress, TextureFormat}; + +use std::{ + borrow::Cow, + iter, + ops::Range, + ptr, + sync::{atomic::Ordering, Arc}, +}; + +use super::{ImplicitPipelineIds, InvalidDevice, UserClosures}; + +impl Global { + pub fn adapter_is_surface_supported<A: HalApi>( + &self, + adapter_id: AdapterId, + surface_id: SurfaceId, + ) -> Result<bool, instance::IsSurfaceSupportedError> { + let hub = A::hub(self); + + let surface_guard = self.surfaces.read(); + let adapter_guard = hub.adapters.read(); + let adapter = adapter_guard + .get(adapter_id) + .map_err(|_| instance::IsSurfaceSupportedError::InvalidAdapter)?; + let surface = surface_guard + .get(surface_id) + .map_err(|_| instance::IsSurfaceSupportedError::InvalidSurface)?; + Ok(adapter.is_surface_supported(surface)) + } + + pub fn surface_get_capabilities<A: HalApi>( + &self, + surface_id: SurfaceId, + adapter_id: AdapterId, + ) -> Result<wgt::SurfaceCapabilities, instance::GetSurfaceSupportError> { + profiling::scope!("Surface::get_capabilities"); + self.fetch_adapter_and_surface::<A, _, _>(surface_id, adapter_id, |adapter, surface| { + let mut hal_caps = surface.get_capabilities(adapter)?; + + hal_caps.formats.sort_by_key(|f| !f.is_srgb()); + + let usages = conv::map_texture_usage_from_hal(hal_caps.usage); + + Ok(wgt::SurfaceCapabilities { + formats: hal_caps.formats, + present_modes: hal_caps.present_modes, + alpha_modes: hal_caps.composite_alpha_modes, + usages, + }) + }) + } + + fn fetch_adapter_and_surface< + A: HalApi, + F: FnOnce(&Adapter<A>, &Surface) -> Result<B, instance::GetSurfaceSupportError>, + B, + >( + &self, + surface_id: SurfaceId, + adapter_id: AdapterId, + get_supported_callback: F, + ) -> Result<B, instance::GetSurfaceSupportError> { + let hub = A::hub(self); + + let surface_guard = self.surfaces.read(); + let adapter_guard = hub.adapters.read(); + let adapter = adapter_guard + .get(adapter_id) + .map_err(|_| instance::GetSurfaceSupportError::InvalidAdapter)?; + let surface = surface_guard + .get(surface_id) + .map_err(|_| instance::GetSurfaceSupportError::InvalidSurface)?; + + get_supported_callback(adapter, surface) + } + + pub fn device_features<A: HalApi>( + &self, + device_id: DeviceId, + ) -> Result<wgt::Features, InvalidDevice> { + let hub = A::hub(self); + + let device = hub.devices.get(device_id).map_err(|_| InvalidDevice)?; + if !device.is_valid() { + return Err(InvalidDevice); + } + + Ok(device.features) + } + + pub fn device_limits<A: HalApi>( + &self, + device_id: DeviceId, + ) -> Result<wgt::Limits, InvalidDevice> { + let hub = A::hub(self); + + let device = hub.devices.get(device_id).map_err(|_| InvalidDevice)?; + if !device.is_valid() { + return Err(InvalidDevice); + } + + Ok(device.limits.clone()) + } + + pub fn device_downlevel_properties<A: HalApi>( + &self, + device_id: DeviceId, + ) -> Result<wgt::DownlevelCapabilities, InvalidDevice> { + let hub = A::hub(self); + + let device = hub.devices.get(device_id).map_err(|_| InvalidDevice)?; + if !device.is_valid() { + return Err(InvalidDevice); + } + + Ok(device.downlevel.clone()) + } + + pub fn device_create_buffer<A: HalApi>( + &self, + device_id: DeviceId, + desc: &resource::BufferDescriptor, + id_in: Option<id::BufferId>, + ) -> (id::BufferId, Option<CreateBufferError>) { + profiling::scope!("Device::create_buffer"); + + let hub = A::hub(self); + let fid = hub.buffers.prepare(id_in); + + let mut to_destroy: ArrayVec<resource::Buffer<A>, 2> = ArrayVec::new(); + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => { + break DeviceError::Invalid.into(); + } + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + + if desc.usage.is_empty() { + // Per spec, `usage` must not be zero. + break CreateBufferError::InvalidUsage(desc.usage); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + let mut desc = desc.clone(); + let mapped_at_creation = std::mem::replace(&mut desc.mapped_at_creation, false); + if mapped_at_creation && !desc.usage.contains(wgt::BufferUsages::MAP_WRITE) { + desc.usage |= wgt::BufferUsages::COPY_DST; + } + trace.add(trace::Action::CreateBuffer(fid.id(), desc)); + } + + let buffer = match device.create_buffer(desc, false) { + Ok(buffer) => buffer, + Err(e) => { + break e; + } + }; + + let buffer_use = if !desc.mapped_at_creation { + hal::BufferUses::empty() + } else if desc.usage.contains(wgt::BufferUsages::MAP_WRITE) { + // buffer is mappable, so we are just doing that at start + let map_size = buffer.size; + let ptr = if map_size == 0 { + std::ptr::NonNull::dangling() + } else { + match map_buffer(device.raw(), &buffer, 0, map_size, HostMap::Write) { + Ok(ptr) => ptr, + Err(e) => { + to_destroy.push(buffer); + break e.into(); + } + } + }; + *buffer.map_state.lock() = resource::BufferMapState::Active { + ptr, + range: 0..map_size, + host: HostMap::Write, + }; + hal::BufferUses::MAP_WRITE + } else { + // buffer needs staging area for initialization only + let stage_desc = wgt::BufferDescriptor { + label: Some(Cow::Borrowed( + "(wgpu internal) initializing unmappable buffer", + )), + size: desc.size, + usage: wgt::BufferUsages::MAP_WRITE | wgt::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }; + let stage = match device.create_buffer(&stage_desc, true) { + Ok(stage) => stage, + Err(e) => { + to_destroy.push(buffer); + break e; + } + }; + + let snatch_guard = device.snatchable_lock.read(); + let stage_raw = stage.raw(&snatch_guard).unwrap(); + let mapping = match unsafe { device.raw().map_buffer(stage_raw, 0..stage.size) } { + Ok(mapping) => mapping, + Err(e) => { + to_destroy.push(buffer); + to_destroy.push(stage); + break CreateBufferError::Device(e.into()); + } + }; + + let stage_fid = hub.buffers.request(); + let stage = stage_fid.init(stage); + + assert_eq!(buffer.size % wgt::COPY_BUFFER_ALIGNMENT, 0); + // Zero initialize memory and then mark both staging and buffer as initialized + // (it's guaranteed that this is the case by the time the buffer is usable) + unsafe { ptr::write_bytes(mapping.ptr.as_ptr(), 0, buffer.size as usize) }; + buffer.initialization_status.write().drain(0..buffer.size); + stage.initialization_status.write().drain(0..buffer.size); + + *buffer.map_state.lock() = resource::BufferMapState::Init { + ptr: mapping.ptr, + needs_flush: !mapping.is_coherent, + stage_buffer: stage, + }; + hal::BufferUses::COPY_DST + }; + + let (id, resource) = fid.assign(buffer); + api_log!("Device::create_buffer({desc:?}) -> {id:?}"); + + device + .trackers + .lock() + .buffers + .insert_single(id, resource, buffer_use); + + return (id, None); + }; + + // Error path + + for buffer in to_destroy { + let device = Arc::clone(&buffer.device); + device + .lock_life() + .schedule_resource_destruction(queue::TempResource::Buffer(Arc::new(buffer)), !0); + } + + let id = fid.assign_error(desc.label.borrow_or_default()); + (id, Some(error)) + } + + /// Assign `id_in` an error with the given `label`. + /// + /// Ensure that future attempts to use `id_in` as a buffer ID will propagate + /// the error, following the WebGPU ["contagious invalidity"] style. + /// + /// Firefox uses this function to comply strictly with the WebGPU spec, + /// which requires [`GPUBufferDescriptor`] validation to be generated on the + /// Device timeline and leave the newly created [`GPUBuffer`] invalid. + /// + /// Ideally, we would simply let [`device_create_buffer`] take care of all + /// of this, but some errors must be detected before we can even construct a + /// [`wgpu_types::BufferDescriptor`] to give it. For example, the WebGPU API + /// allows a `GPUBufferDescriptor`'s [`usage`] property to be any WebIDL + /// `unsigned long` value, but we can't construct a + /// [`wgpu_types::BufferUsages`] value from values with unassigned bits + /// set. This means we must validate `usage` before we can call + /// `device_create_buffer`. + /// + /// When that validation fails, we must arrange for the buffer id to be + /// considered invalid. This method provides the means to do so. + /// + /// ["contagious invalidity"]: https://www.w3.org/TR/webgpu/#invalidity + /// [`GPUBufferDescriptor`]: https://www.w3.org/TR/webgpu/#dictdef-gpubufferdescriptor + /// [`GPUBuffer`]: https://www.w3.org/TR/webgpu/#gpubuffer + /// [`wgpu_types::BufferDescriptor`]: wgt::BufferDescriptor + /// [`device_create_buffer`]: Global::device_create_buffer + /// [`usage`]: https://www.w3.org/TR/webgpu/#dom-gputexturedescriptor-usage + /// [`wgpu_types::BufferUsages`]: wgt::BufferUsages + pub fn create_buffer_error<A: HalApi>(&self, id_in: Option<id::BufferId>, label: Label) { + let hub = A::hub(self); + let fid = hub.buffers.prepare(id_in); + + fid.assign_error(label.borrow_or_default()); + } + + pub fn create_render_bundle_error<A: HalApi>( + &self, + id_in: Option<id::RenderBundleId>, + label: Label, + ) { + let hub = A::hub(self); + let fid = hub.render_bundles.prepare(id_in); + + fid.assign_error(label.borrow_or_default()); + } + + /// Assign `id_in` an error with the given `label`. + /// + /// See `create_buffer_error` for more context and explanation. + pub fn create_texture_error<A: HalApi>(&self, id_in: Option<id::TextureId>, label: Label) { + let hub = A::hub(self); + let fid = hub.textures.prepare(id_in); + + fid.assign_error(label.borrow_or_default()); + } + + #[cfg(feature = "replay")] + pub fn device_wait_for_buffer<A: HalApi>( + &self, + device_id: DeviceId, + buffer_id: id::BufferId, + ) -> Result<(), WaitIdleError> { + let hub = A::hub(self); + + let last_submission = { + let buffer_guard = hub.buffers.write(); + match buffer_guard.get(buffer_id) { + Ok(buffer) => buffer.info.submission_index(), + Err(_) => return Ok(()), + } + }; + + hub.devices + .get(device_id) + .map_err(|_| DeviceError::Invalid)? + .wait_for_submit(last_submission) + } + + #[doc(hidden)] + pub fn device_set_buffer_sub_data<A: HalApi>( + &self, + device_id: DeviceId, + buffer_id: id::BufferId, + offset: BufferAddress, + data: &[u8], + ) -> BufferAccessResult { + profiling::scope!("Device::set_buffer_sub_data"); + + let hub = A::hub(self); + + let device = hub + .devices + .get(device_id) + .map_err(|_| DeviceError::Invalid)?; + let snatch_guard = device.snatchable_lock.read(); + if !device.is_valid() { + return Err(DeviceError::Lost.into()); + } + + let buffer = hub + .buffers + .get(buffer_id) + .map_err(|_| BufferAccessError::Invalid)?; + check_buffer_usage(buffer.usage, wgt::BufferUsages::MAP_WRITE)?; + //assert!(buffer isn't used by the GPU); + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + let data_path = trace.make_binary("bin", data); + trace.add(trace::Action::WriteBuffer { + id: buffer_id, + data: data_path, + range: offset..offset + data.len() as BufferAddress, + queued: false, + }); + } + + let raw_buf = buffer + .raw(&snatch_guard) + .ok_or(BufferAccessError::Destroyed)?; + unsafe { + let mapping = device + .raw() + .map_buffer(raw_buf, offset..offset + data.len() as u64) + .map_err(DeviceError::from)?; + ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len()); + if !mapping.is_coherent { + device + .raw() + .flush_mapped_ranges(raw_buf, iter::once(offset..offset + data.len() as u64)); + } + device + .raw() + .unmap_buffer(raw_buf) + .map_err(DeviceError::from)?; + } + + Ok(()) + } + + #[doc(hidden)] + pub fn device_get_buffer_sub_data<A: HalApi>( + &self, + device_id: DeviceId, + buffer_id: id::BufferId, + offset: BufferAddress, + data: &mut [u8], + ) -> BufferAccessResult { + profiling::scope!("Device::get_buffer_sub_data"); + + let hub = A::hub(self); + + let device = hub + .devices + .get(device_id) + .map_err(|_| DeviceError::Invalid)?; + if !device.is_valid() { + return Err(DeviceError::Lost.into()); + } + + let snatch_guard = device.snatchable_lock.read(); + + let buffer = hub + .buffers + .get(buffer_id) + .map_err(|_| BufferAccessError::Invalid)?; + check_buffer_usage(buffer.usage, wgt::BufferUsages::MAP_READ)?; + //assert!(buffer isn't used by the GPU); + + let raw_buf = buffer + .raw(&snatch_guard) + .ok_or(BufferAccessError::Destroyed)?; + unsafe { + let mapping = device + .raw() + .map_buffer(raw_buf, offset..offset + data.len() as u64) + .map_err(DeviceError::from)?; + if !mapping.is_coherent { + device.raw().invalidate_mapped_ranges( + raw_buf, + iter::once(offset..offset + data.len() as u64), + ); + } + ptr::copy_nonoverlapping(mapping.ptr.as_ptr(), data.as_mut_ptr(), data.len()); + device + .raw() + .unmap_buffer(raw_buf) + .map_err(DeviceError::from)?; + } + + Ok(()) + } + + pub fn buffer_label<A: HalApi>(&self, id: id::BufferId) -> String { + A::hub(self).buffers.label_for_resource(id) + } + + pub fn buffer_destroy<A: HalApi>( + &self, + buffer_id: id::BufferId, + ) -> Result<(), resource::DestroyError> { + profiling::scope!("Buffer::destroy"); + api_log!("Buffer::destroy {buffer_id:?}"); + + let hub = A::hub(self); + + let buffer = hub + .buffers + .get(buffer_id) + .map_err(|_| resource::DestroyError::Invalid)?; + + let _ = buffer.unmap(); + + buffer.destroy() + } + + pub fn buffer_drop<A: HalApi>(&self, buffer_id: id::BufferId, wait: bool) { + profiling::scope!("Buffer::drop"); + api_log!("Buffer::drop {buffer_id:?}"); + + let hub = A::hub(self); + + let buffer = match hub.buffers.unregister(buffer_id) { + Some(buffer) => buffer, + None => { + return; + } + }; + + let _ = buffer.unmap(); + + let last_submit_index = buffer.info.submission_index(); + + let device = buffer.device.clone(); + + if device + .pending_writes + .lock() + .as_ref() + .unwrap() + .dst_buffers + .contains_key(&buffer_id) + { + device.lock_life().future_suspected_buffers.push(buffer); + } else { + device + .lock_life() + .suspected_resources + .buffers + .insert(buffer_id, buffer); + } + + if wait { + match device.wait_for_submit(last_submit_index) { + Ok(()) => (), + Err(e) => log::error!("Failed to wait for buffer {:?}: {}", buffer_id, e), + } + } + } + + pub fn device_create_texture<A: HalApi>( + &self, + device_id: DeviceId, + desc: &resource::TextureDescriptor, + id_in: Option<id::TextureId>, + ) -> (id::TextureId, Option<resource::CreateTextureError>) { + profiling::scope!("Device::create_texture"); + + let hub = A::hub(self); + + let fid = hub.textures.prepare(id_in); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::CreateTexture(fid.id(), desc.clone())); + } + + let texture = match device.create_texture(&device.adapter, desc) { + Ok(texture) => texture, + Err(error) => break error, + }; + + let (id, resource) = fid.assign(texture); + api_log!("Device::create_texture({desc:?}) -> {id:?}"); + + device.trackers.lock().textures.insert_single( + id, + resource, + hal::TextureUses::UNINITIALIZED, + ); + + return (id, None); + }; + + log::error!("Device::create_texture error: {error}"); + + let id = fid.assign_error(desc.label.borrow_or_default()); + (id, Some(error)) + } + + /// # Safety + /// + /// - `hal_texture` must be created from `device_id` corresponding raw handle. + /// - `hal_texture` must be created respecting `desc` + /// - `hal_texture` must be initialized + pub unsafe fn create_texture_from_hal<A: HalApi>( + &self, + hal_texture: A::Texture, + device_id: DeviceId, + desc: &resource::TextureDescriptor, + id_in: Option<id::TextureId>, + ) -> (id::TextureId, Option<resource::CreateTextureError>) { + profiling::scope!("Device::create_texture_from_hal"); + + let hub = A::hub(self); + + let fid = hub.textures.prepare(id_in); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + + // NB: Any change done through the raw texture handle will not be + // recorded in the replay + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::CreateTexture(fid.id(), desc.clone())); + } + + let format_features = match device + .describe_format_features(&device.adapter, desc.format) + .map_err(|error| resource::CreateTextureError::MissingFeatures(desc.format, error)) + { + Ok(features) => features, + Err(error) => break error, + }; + + let mut texture = device.create_texture_from_hal( + hal_texture, + conv::map_texture_usage(desc.usage, desc.format.into()), + desc, + format_features, + resource::TextureClearMode::None, + ); + if desc.usage.contains(wgt::TextureUsages::COPY_DST) { + texture.hal_usage |= hal::TextureUses::COPY_DST; + } + + texture.initialization_status = + RwLock::new(TextureInitTracker::new(desc.mip_level_count, 0)); + + let (id, resource) = fid.assign(texture); + api_log!("Device::create_texture({desc:?}) -> {id:?}"); + + device.trackers.lock().textures.insert_single( + id, + resource, + hal::TextureUses::UNINITIALIZED, + ); + + return (id, None); + }; + + log::error!("Device::create_texture error: {error}"); + + let id = fid.assign_error(desc.label.borrow_or_default()); + (id, Some(error)) + } + + /// # Safety + /// + /// - `hal_buffer` must be created from `device_id` corresponding raw handle. + /// - `hal_buffer` must be created respecting `desc` + /// - `hal_buffer` must be initialized + pub unsafe fn create_buffer_from_hal<A: HalApi>( + &self, + hal_buffer: A::Buffer, + device_id: DeviceId, + desc: &resource::BufferDescriptor, + id_in: Option<id::BufferId>, + ) -> (id::BufferId, Option<CreateBufferError>) { + profiling::scope!("Device::create_buffer"); + + let hub = A::hub(self); + let fid = hub.buffers.prepare(id_in); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + + // NB: Any change done through the raw buffer handle will not be + // recorded in the replay + #[cfg(feature = "trace")] + if let Some(trace) = device.trace.lock().as_mut() { + trace.add(trace::Action::CreateBuffer(fid.id(), desc.clone())); + } + + let buffer = device.create_buffer_from_hal(hal_buffer, desc); + + let (id, buffer) = fid.assign(buffer); + api_log!("Device::create_buffer -> {id:?}"); + + device + .trackers + .lock() + .buffers + .insert_single(id, buffer, hal::BufferUses::empty()); + + return (id, None); + }; + + log::error!("Device::create_buffer error: {error}"); + + let id = fid.assign_error(desc.label.borrow_or_default()); + (id, Some(error)) + } + + pub fn texture_label<A: HalApi>(&self, id: id::TextureId) -> String { + A::hub(self).textures.label_for_resource(id) + } + + pub fn texture_destroy<A: HalApi>( + &self, + texture_id: id::TextureId, + ) -> Result<(), resource::DestroyError> { + profiling::scope!("Texture::destroy"); + api_log!("Texture::destroy {texture_id:?}"); + + let hub = A::hub(self); + + let texture = hub + .textures + .get(texture_id) + .map_err(|_| resource::DestroyError::Invalid)?; + + texture.destroy() + } + + pub fn texture_drop<A: HalApi>(&self, texture_id: id::TextureId, wait: bool) { + profiling::scope!("Texture::drop"); + api_log!("Texture::drop {texture_id:?}"); + + let hub = A::hub(self); + + if let Some(texture) = hub.textures.unregister(texture_id) { + let last_submit_index = texture.info.submission_index(); + + let device = &texture.device; + { + if device + .pending_writes + .lock() + .as_ref() + .unwrap() + .dst_textures + .contains_key(&texture_id) + { + device + .lock_life() + .future_suspected_textures + .push(texture.clone()); + } else { + device + .lock_life() + .suspected_resources + .textures + .insert(texture_id, texture.clone()); + } + } + + if wait { + match device.wait_for_submit(last_submit_index) { + Ok(()) => (), + Err(e) => log::error!("Failed to wait for texture {texture_id:?}: {e}"), + } + } + } + } + + #[allow(unused_unsafe)] + pub fn texture_create_view<A: HalApi>( + &self, + texture_id: id::TextureId, + desc: &resource::TextureViewDescriptor, + id_in: Option<id::TextureViewId>, + ) -> (id::TextureViewId, Option<resource::CreateTextureViewError>) { + profiling::scope!("Texture::create_view"); + + let hub = A::hub(self); + + let fid = hub.texture_views.prepare(id_in); + + let error = loop { + let texture = match hub.textures.get(texture_id) { + Ok(texture) => texture, + Err(_) => break resource::CreateTextureViewError::InvalidTexture, + }; + let device = &texture.device; + { + let snatch_guard = device.snatchable_lock.read(); + if texture.is_destroyed(&snatch_guard) { + break resource::CreateTextureViewError::InvalidTexture; + } + } + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::CreateTextureView { + id: fid.id(), + parent_id: texture_id, + desc: desc.clone(), + }); + } + + let view = match unsafe { device.create_texture_view(&texture, desc) } { + Ok(view) => view, + Err(e) => break e, + }; + + let (id, resource) = fid.assign(view); + + { + let mut views = texture.views.lock(); + views.push(Arc::downgrade(&resource)); + } + + api_log!("Texture::create_view({texture_id:?}) -> {id:?}"); + device.trackers.lock().views.insert_single(id, resource); + return (id, None); + }; + + log::error!("Texture::create_view({texture_id:?}) error: {error}"); + let id = fid.assign_error(desc.label.borrow_or_default()); + (id, Some(error)) + } + + pub fn texture_view_label<A: HalApi>(&self, id: id::TextureViewId) -> String { + A::hub(self).texture_views.label_for_resource(id) + } + + pub fn texture_view_drop<A: HalApi>( + &self, + texture_view_id: id::TextureViewId, + wait: bool, + ) -> Result<(), resource::TextureViewDestroyError> { + profiling::scope!("TextureView::drop"); + api_log!("TextureView::drop {texture_view_id:?}"); + + let hub = A::hub(self); + + if let Some(view) = hub.texture_views.unregister(texture_view_id) { + let last_submit_index = view.info.submission_index(); + + view.device + .lock_life() + .suspected_resources + .texture_views + .insert(texture_view_id, view.clone()); + + if wait { + match view.device.wait_for_submit(last_submit_index) { + Ok(()) => (), + Err(e) => { + log::error!("Failed to wait for texture view {texture_view_id:?}: {e}") + } + } + } + } + Ok(()) + } + + pub fn device_create_sampler<A: HalApi>( + &self, + device_id: DeviceId, + desc: &resource::SamplerDescriptor, + id_in: Option<id::SamplerId>, + ) -> (id::SamplerId, Option<resource::CreateSamplerError>) { + profiling::scope!("Device::create_sampler"); + + let hub = A::hub(self); + let fid = hub.samplers.prepare(id_in); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::CreateSampler(fid.id(), desc.clone())); + } + + let sampler = match device.create_sampler(desc) { + Ok(sampler) => sampler, + Err(e) => break e, + }; + + let (id, resource) = fid.assign(sampler); + api_log!("Device::create_sampler -> {id:?}"); + device.trackers.lock().samplers.insert_single(id, resource); + + return (id, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default()); + (id, Some(error)) + } + + pub fn sampler_label<A: HalApi>(&self, id: id::SamplerId) -> String { + A::hub(self).samplers.label_for_resource(id) + } + + pub fn sampler_drop<A: HalApi>(&self, sampler_id: id::SamplerId) { + profiling::scope!("Sampler::drop"); + api_log!("Sampler::drop {sampler_id:?}"); + + let hub = A::hub(self); + + if let Some(sampler) = hub.samplers.unregister(sampler_id) { + sampler + .device + .lock_life() + .suspected_resources + .samplers + .insert(sampler_id, sampler.clone()); + } + } + + pub fn device_create_bind_group_layout<A: HalApi>( + &self, + device_id: DeviceId, + desc: &binding_model::BindGroupLayoutDescriptor, + id_in: Option<id::BindGroupLayoutId>, + ) -> ( + id::BindGroupLayoutId, + Option<binding_model::CreateBindGroupLayoutError>, + ) { + profiling::scope!("Device::create_bind_group_layout"); + + let hub = A::hub(self); + let fid = hub.bind_group_layouts.prepare(id_in); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::CreateBindGroupLayout(fid.id(), desc.clone())); + } + + let entry_map = match bgl::EntryMap::from_entries(&device.limits, &desc.entries) { + Ok(map) => map, + Err(e) => break e, + }; + + // Currently we make a distinction between fid.assign and fid.assign_existing. This distinction is incorrect, + // but see https://github.com/gfx-rs/wgpu/issues/4912. + // + // `assign` also registers the ID with the resource info, so it can be automatically reclaimed. This needs to + // happen with a mutable reference, which means it can only happen on creation. + // + // Because we need to call `assign` inside the closure (to get mut access), we need to "move" the future id into the closure. + // Rust cannot figure out at compile time that we only ever consume the ID once, so we need to move the check + // to runtime using an Option. + let mut fid = Some(fid); + + // The closure might get called, and it might give us an ID. Side channel it out of the closure. + let mut id = None; + + let bgl_result = device.bgl_pool.get_or_init(entry_map, |entry_map| { + let bgl = + device.create_bind_group_layout(&desc.label, entry_map, bgl::Origin::Pool)?; + + let (id_inner, arc) = fid.take().unwrap().assign(bgl); + id = Some(id_inner); + + Ok(arc) + }); + + let layout = match bgl_result { + Ok(layout) => layout, + Err(e) => break e, + }; + + // If the ID was not assigned, and we survived the above check, + // it means that the bind group layout already existed and we need to call `assign_existing`. + // + // Calling this function _will_ leak the ID. See https://github.com/gfx-rs/wgpu/issues/4912. + if id.is_none() { + id = Some(fid.take().unwrap().assign_existing(&layout)) + } + + api_log!("Device::create_bind_group_layout -> {id:?}"); + return (id.unwrap(), None); + }; + + let fid = hub.bind_group_layouts.prepare(id_in); + let id = fid.assign_error(desc.label.borrow_or_default()); + (id, Some(error)) + } + + pub fn bind_group_layout_label<A: HalApi>(&self, id: id::BindGroupLayoutId) -> String { + A::hub(self).bind_group_layouts.label_for_resource(id) + } + + pub fn bind_group_layout_drop<A: HalApi>(&self, bind_group_layout_id: id::BindGroupLayoutId) { + profiling::scope!("BindGroupLayout::drop"); + api_log!("BindGroupLayout::drop {bind_group_layout_id:?}"); + + let hub = A::hub(self); + + if let Some(layout) = hub.bind_group_layouts.unregister(bind_group_layout_id) { + layout + .device + .lock_life() + .suspected_resources + .bind_group_layouts + .insert(bind_group_layout_id, layout.clone()); + } + } + + pub fn device_create_pipeline_layout<A: HalApi>( + &self, + device_id: DeviceId, + desc: &binding_model::PipelineLayoutDescriptor, + id_in: Option<id::PipelineLayoutId>, + ) -> ( + id::PipelineLayoutId, + Option<binding_model::CreatePipelineLayoutError>, + ) { + profiling::scope!("Device::create_pipeline_layout"); + + let hub = A::hub(self); + let fid = hub.pipeline_layouts.prepare(id_in); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::CreatePipelineLayout(fid.id(), desc.clone())); + } + + let layout = match device.create_pipeline_layout(desc, &hub.bind_group_layouts) { + Ok(layout) => layout, + Err(e) => break e, + }; + + let (id, _) = fid.assign(layout); + api_log!("Device::create_pipeline_layout -> {id:?}"); + return (id, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default()); + (id, Some(error)) + } + + pub fn pipeline_layout_label<A: HalApi>(&self, id: id::PipelineLayoutId) -> String { + A::hub(self).pipeline_layouts.label_for_resource(id) + } + + pub fn pipeline_layout_drop<A: HalApi>(&self, pipeline_layout_id: id::PipelineLayoutId) { + profiling::scope!("PipelineLayout::drop"); + api_log!("PipelineLayout::drop {pipeline_layout_id:?}"); + + let hub = A::hub(self); + if let Some(layout) = hub.pipeline_layouts.unregister(pipeline_layout_id) { + layout + .device + .lock_life() + .suspected_resources + .pipeline_layouts + .insert(pipeline_layout_id, layout.clone()); + } + } + + pub fn device_create_bind_group<A: HalApi>( + &self, + device_id: DeviceId, + desc: &binding_model::BindGroupDescriptor, + id_in: Option<id::BindGroupId>, + ) -> (id::BindGroupId, Option<binding_model::CreateBindGroupError>) { + profiling::scope!("Device::create_bind_group"); + + let hub = A::hub(self); + let fid = hub.bind_groups.prepare(id_in); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::CreateBindGroup(fid.id(), desc.clone())); + } + + let bind_group_layout = match hub.bind_group_layouts.get(desc.layout) { + Ok(layout) => layout, + Err(..) => break binding_model::CreateBindGroupError::InvalidLayout, + }; + + if bind_group_layout.device.as_info().id() != device.as_info().id() { + break DeviceError::WrongDevice.into(); + } + + let bind_group = match device.create_bind_group(&bind_group_layout, desc, hub) { + Ok(bind_group) => bind_group, + Err(e) => break e, + }; + + let (id, resource) = fid.assign(bind_group); + + let weak_ref = Arc::downgrade(&resource); + for range in &resource.used_texture_ranges { + range.texture.bind_groups.lock().push(weak_ref.clone()); + } + for range in &resource.used_buffer_ranges { + range.buffer.bind_groups.lock().push(weak_ref.clone()); + } + + api_log!("Device::create_bind_group -> {id:?}"); + + device + .trackers + .lock() + .bind_groups + .insert_single(id, resource); + return (id, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default()); + (id, Some(error)) + } + + pub fn bind_group_label<A: HalApi>(&self, id: id::BindGroupId) -> String { + A::hub(self).bind_groups.label_for_resource(id) + } + + pub fn bind_group_drop<A: HalApi>(&self, bind_group_id: id::BindGroupId) { + profiling::scope!("BindGroup::drop"); + api_log!("BindGroup::drop {bind_group_id:?}"); + + let hub = A::hub(self); + + if let Some(bind_group) = hub.bind_groups.unregister(bind_group_id) { + bind_group + .device + .lock_life() + .suspected_resources + .bind_groups + .insert(bind_group_id, bind_group.clone()); + } + } + + pub fn device_create_shader_module<A: HalApi>( + &self, + device_id: DeviceId, + desc: &pipeline::ShaderModuleDescriptor, + source: pipeline::ShaderModuleSource, + id_in: Option<id::ShaderModuleId>, + ) -> ( + id::ShaderModuleId, + Option<pipeline::CreateShaderModuleError>, + ) { + profiling::scope!("Device::create_shader_module"); + + let hub = A::hub(self); + let fid = hub.shader_modules.prepare(id_in); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + let data = match source { + #[cfg(feature = "wgsl")] + pipeline::ShaderModuleSource::Wgsl(ref code) => { + trace.make_binary("wgsl", code.as_bytes()) + } + #[cfg(feature = "glsl")] + pipeline::ShaderModuleSource::Glsl(ref code, _) => { + trace.make_binary("glsl", code.as_bytes()) + } + #[cfg(feature = "spirv")] + pipeline::ShaderModuleSource::SpirV(ref code, _) => { + trace.make_binary("spirv", bytemuck::cast_slice::<u32, u8>(code)) + } + pipeline::ShaderModuleSource::Naga(ref module) => { + let string = + ron::ser::to_string_pretty(module, ron::ser::PrettyConfig::default()) + .unwrap(); + trace.make_binary("ron", string.as_bytes()) + } + pipeline::ShaderModuleSource::Dummy(_) => { + panic!("found `ShaderModuleSource::Dummy`") + } + }; + trace.add(trace::Action::CreateShaderModule { + id: fid.id(), + desc: desc.clone(), + data, + }); + }; + + let shader = match device.create_shader_module(desc, source) { + Ok(shader) => shader, + Err(e) => break e, + }; + + let (id, _) = fid.assign(shader); + api_log!("Device::create_shader_module -> {id:?}"); + return (id, None); + }; + + log::error!("Device::create_shader_module error: {error}"); + + let id = fid.assign_error(desc.label.borrow_or_default()); + (id, Some(error)) + } + + // Unsafe-ness of internal calls has little to do with unsafe-ness of this. + #[allow(unused_unsafe)] + /// # Safety + /// + /// This function passes SPIR-V binary to the backend as-is and can potentially result in a + /// driver crash. + pub unsafe fn device_create_shader_module_spirv<A: HalApi>( + &self, + device_id: DeviceId, + desc: &pipeline::ShaderModuleDescriptor, + source: Cow<[u32]>, + id_in: Option<id::ShaderModuleId>, + ) -> ( + id::ShaderModuleId, + Option<pipeline::CreateShaderModuleError>, + ) { + profiling::scope!("Device::create_shader_module"); + + let hub = A::hub(self); + let fid = hub.shader_modules.prepare(id_in); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + let data = trace.make_binary("spv", unsafe { + std::slice::from_raw_parts(source.as_ptr() as *const u8, source.len() * 4) + }); + trace.add(trace::Action::CreateShaderModule { + id: fid.id(), + desc: desc.clone(), + data, + }); + }; + + let shader = match unsafe { device.create_shader_module_spirv(desc, &source) } { + Ok(shader) => shader, + Err(e) => break e, + }; + let (id, _) = fid.assign(shader); + api_log!("Device::create_shader_module_spirv -> {id:?}"); + return (id, None); + }; + + log::error!("Device::create_shader_module_spirv error: {error}"); + + let id = fid.assign_error(desc.label.borrow_or_default()); + (id, Some(error)) + } + + pub fn shader_module_label<A: HalApi>(&self, id: id::ShaderModuleId) -> String { + A::hub(self).shader_modules.label_for_resource(id) + } + + pub fn shader_module_drop<A: HalApi>(&self, shader_module_id: id::ShaderModuleId) { + profiling::scope!("ShaderModule::drop"); + api_log!("ShaderModule::drop {shader_module_id:?}"); + + let hub = A::hub(self); + hub.shader_modules.unregister(shader_module_id); + } + + pub fn device_create_command_encoder<A: HalApi>( + &self, + device_id: DeviceId, + desc: &wgt::CommandEncoderDescriptor<Label>, + id_in: Option<id::CommandEncoderId>, + ) -> (id::CommandEncoderId, Option<DeviceError>) { + profiling::scope!("Device::create_command_encoder"); + + let hub = A::hub(self); + let fid = hub.command_buffers.prepare(id_in.map(|id| id.transmute())); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid, + }; + if !device.is_valid() { + break DeviceError::Lost; + } + let queue = match hub.queues.get(device.queue_id.read().unwrap()) { + Ok(queue) => queue, + Err(_) => break DeviceError::InvalidQueueId, + }; + let encoder = match device + .command_allocator + .lock() + .as_mut() + .unwrap() + .acquire_encoder(device.raw(), queue.raw.as_ref().unwrap()) + { + Ok(raw) => raw, + Err(_) => break DeviceError::OutOfMemory, + }; + let command_buffer = command::CommandBuffer::new( + encoder, + &device, + #[cfg(feature = "trace")] + device.trace.lock().is_some(), + desc.label + .to_hal(device.instance_flags) + .map(|s| s.to_string()), + ); + + let (id, _) = fid.assign(command_buffer); + api_log!("Device::create_command_encoder -> {id:?}"); + return (id.transmute(), None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default()); + (id.transmute(), Some(error)) + } + + pub fn command_buffer_label<A: HalApi>(&self, id: id::CommandBufferId) -> String { + A::hub(self).command_buffers.label_for_resource(id) + } + + pub fn command_encoder_drop<A: HalApi>(&self, command_encoder_id: id::CommandEncoderId) { + profiling::scope!("CommandEncoder::drop"); + api_log!("CommandEncoder::drop {command_encoder_id:?}"); + + let hub = A::hub(self); + + if let Some(cmd_buf) = hub + .command_buffers + .unregister(command_encoder_id.transmute()) + { + cmd_buf + .device + .untrack(&cmd_buf.data.lock().as_ref().unwrap().trackers); + } + } + + pub fn command_buffer_drop<A: HalApi>(&self, command_buffer_id: id::CommandBufferId) { + profiling::scope!("CommandBuffer::drop"); + api_log!("CommandBuffer::drop {command_buffer_id:?}"); + self.command_encoder_drop::<A>(command_buffer_id.transmute()) + } + + pub fn device_create_render_bundle_encoder( + &self, + device_id: DeviceId, + desc: &command::RenderBundleEncoderDescriptor, + ) -> ( + *mut command::RenderBundleEncoder, + Option<command::CreateRenderBundleError>, + ) { + profiling::scope!("Device::create_render_bundle_encoder"); + api_log!("Device::device_create_render_bundle_encoder"); + let (encoder, error) = match command::RenderBundleEncoder::new(desc, device_id, None) { + Ok(encoder) => (encoder, None), + Err(e) => (command::RenderBundleEncoder::dummy(device_id), Some(e)), + }; + (Box::into_raw(Box::new(encoder)), error) + } + + pub fn render_bundle_encoder_finish<A: HalApi>( + &self, + bundle_encoder: command::RenderBundleEncoder, + desc: &command::RenderBundleDescriptor, + id_in: Option<id::RenderBundleId>, + ) -> (id::RenderBundleId, Option<command::RenderBundleError>) { + profiling::scope!("RenderBundleEncoder::finish"); + + let hub = A::hub(self); + + let fid = hub.render_bundles.prepare(id_in); + + let error = loop { + let device = match hub.devices.get(bundle_encoder.parent()) { + Ok(device) => device, + Err(_) => break command::RenderBundleError::INVALID_DEVICE, + }; + if !device.is_valid() { + break command::RenderBundleError::INVALID_DEVICE; + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::CreateRenderBundle { + id: fid.id(), + desc: trace::new_render_bundle_encoder_descriptor( + desc.label.clone(), + &bundle_encoder.context, + bundle_encoder.is_depth_read_only, + bundle_encoder.is_stencil_read_only, + ), + base: bundle_encoder.to_base_pass(), + }); + } + + let render_bundle = match bundle_encoder.finish(desc, &device, hub) { + Ok(bundle) => bundle, + Err(e) => break e, + }; + + let (id, resource) = fid.assign(render_bundle); + api_log!("RenderBundleEncoder::finish -> {id:?}"); + device.trackers.lock().bundles.insert_single(id, resource); + return (id, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default()); + (id, Some(error)) + } + + pub fn render_bundle_label<A: HalApi>(&self, id: id::RenderBundleId) -> String { + A::hub(self).render_bundles.label_for_resource(id) + } + + pub fn render_bundle_drop<A: HalApi>(&self, render_bundle_id: id::RenderBundleId) { + profiling::scope!("RenderBundle::drop"); + api_log!("RenderBundle::drop {render_bundle_id:?}"); + + let hub = A::hub(self); + + if let Some(bundle) = hub.render_bundles.unregister(render_bundle_id) { + bundle + .device + .lock_life() + .suspected_resources + .render_bundles + .insert(render_bundle_id, bundle.clone()); + } + } + + pub fn device_create_query_set<A: HalApi>( + &self, + device_id: DeviceId, + desc: &resource::QuerySetDescriptor, + id_in: Option<id::QuerySetId>, + ) -> (id::QuerySetId, Option<resource::CreateQuerySetError>) { + profiling::scope!("Device::create_query_set"); + + let hub = A::hub(self); + let fid = hub.query_sets.prepare(id_in); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::CreateQuerySet { + id: fid.id(), + desc: desc.clone(), + }); + } + + let query_set = match device.create_query_set(desc) { + Ok(query_set) => query_set, + Err(err) => break err, + }; + + let (id, resource) = fid.assign(query_set); + api_log!("Device::create_query_set -> {id:?}"); + device + .trackers + .lock() + .query_sets + .insert_single(id, resource); + + return (id, None); + }; + + let id = fid.assign_error(""); + (id, Some(error)) + } + + pub fn query_set_drop<A: HalApi>(&self, query_set_id: id::QuerySetId) { + profiling::scope!("QuerySet::drop"); + api_log!("QuerySet::drop {query_set_id:?}"); + + let hub = A::hub(self); + + if let Some(query_set) = hub.query_sets.unregister(query_set_id) { + let device = &query_set.device; + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::DestroyQuerySet(query_set_id)); + } + + device + .lock_life() + .suspected_resources + .query_sets + .insert(query_set_id, query_set.clone()); + } + } + + pub fn query_set_label<A: HalApi>(&self, id: id::QuerySetId) -> String { + A::hub(self).query_sets.label_for_resource(id) + } + + pub fn device_create_render_pipeline<A: HalApi>( + &self, + device_id: DeviceId, + desc: &pipeline::RenderPipelineDescriptor, + id_in: Option<id::RenderPipelineId>, + implicit_pipeline_ids: Option<ImplicitPipelineIds<'_>>, + ) -> ( + id::RenderPipelineId, + Option<pipeline::CreateRenderPipelineError>, + ) { + profiling::scope!("Device::create_render_pipeline"); + + let hub = A::hub(self); + + let fid = hub.render_pipelines.prepare(id_in); + let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub)); + let implicit_error_context = implicit_context.clone(); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::CreateRenderPipeline { + id: fid.id(), + desc: desc.clone(), + implicit_context: implicit_context.clone(), + }); + } + + let pipeline = + match device.create_render_pipeline(&device.adapter, desc, implicit_context, hub) { + Ok(pair) => pair, + Err(e) => break e, + }; + + let (id, resource) = fid.assign(pipeline); + api_log!("Device::create_render_pipeline -> {id:?}"); + + device + .trackers + .lock() + .render_pipelines + .insert_single(id, resource); + + return (id, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default()); + + // We also need to assign errors to the implicit pipeline layout and the + // implicit bind group layout. We have to remove any existing entries first. + let mut pipeline_layout_guard = hub.pipeline_layouts.write(); + let mut bgl_guard = hub.bind_group_layouts.write(); + if let Some(ref ids) = implicit_error_context { + if pipeline_layout_guard.contains(ids.root_id) { + pipeline_layout_guard.remove(ids.root_id); + } + pipeline_layout_guard.insert_error(ids.root_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL); + for &bgl_id in ids.group_ids.iter() { + if bgl_guard.contains(bgl_id) { + bgl_guard.remove(bgl_id); + } + bgl_guard.insert_error(bgl_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL); + } + } + + log::error!("Device::create_render_pipeline error: {error}"); + + (id, Some(error)) + } + + /// Get an ID of one of the bind group layouts. The ID adds a refcount, + /// which needs to be released by calling `bind_group_layout_drop`. + pub fn render_pipeline_get_bind_group_layout<A: HalApi>( + &self, + pipeline_id: id::RenderPipelineId, + index: u32, + id_in: Option<id::BindGroupLayoutId>, + ) -> ( + id::BindGroupLayoutId, + Option<binding_model::GetBindGroupLayoutError>, + ) { + let hub = A::hub(self); + + let error = loop { + let pipeline = match hub.render_pipelines.get(pipeline_id) { + Ok(pipeline) => pipeline, + Err(_) => break binding_model::GetBindGroupLayoutError::InvalidPipeline, + }; + let id = match pipeline.layout.bind_group_layouts.get(index as usize) { + Some(bg) => hub.bind_group_layouts.prepare(id_in).assign_existing(bg), + None => break binding_model::GetBindGroupLayoutError::InvalidGroupIndex(index), + }; + return (id, None); + }; + + let id = hub + .bind_group_layouts + .prepare(id_in) + .assign_error("<derived>"); + (id, Some(error)) + } + + pub fn render_pipeline_label<A: HalApi>(&self, id: id::RenderPipelineId) -> String { + A::hub(self).render_pipelines.label_for_resource(id) + } + + pub fn render_pipeline_drop<A: HalApi>(&self, render_pipeline_id: id::RenderPipelineId) { + profiling::scope!("RenderPipeline::drop"); + api_log!("RenderPipeline::drop {render_pipeline_id:?}"); + + let hub = A::hub(self); + + if let Some(pipeline) = hub.render_pipelines.unregister(render_pipeline_id) { + let layout_id = pipeline.layout.as_info().id(); + let device = &pipeline.device; + let mut life_lock = device.lock_life(); + life_lock + .suspected_resources + .render_pipelines + .insert(render_pipeline_id, pipeline.clone()); + + life_lock + .suspected_resources + .pipeline_layouts + .insert(layout_id, pipeline.layout.clone()); + } + } + + pub fn device_create_compute_pipeline<A: HalApi>( + &self, + device_id: DeviceId, + desc: &pipeline::ComputePipelineDescriptor, + id_in: Option<id::ComputePipelineId>, + implicit_pipeline_ids: Option<ImplicitPipelineIds<'_>>, + ) -> ( + id::ComputePipelineId, + Option<pipeline::CreateComputePipelineError>, + ) { + profiling::scope!("Device::create_compute_pipeline"); + + let hub = A::hub(self); + + let fid = hub.compute_pipelines.prepare(id_in); + let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub)); + let implicit_error_context = implicit_context.clone(); + + let error = loop { + let device = match hub.devices.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::CreateComputePipeline { + id: fid.id(), + desc: desc.clone(), + implicit_context: implicit_context.clone(), + }); + } + let pipeline = match device.create_compute_pipeline(desc, implicit_context, hub) { + Ok(pair) => pair, + Err(e) => break e, + }; + + let (id, resource) = fid.assign(pipeline); + api_log!("Device::create_compute_pipeline -> {id:?}"); + + device + .trackers + .lock() + .compute_pipelines + .insert_single(id, resource); + return (id, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default()); + + // We also need to assign errors to the implicit pipeline layout and the + // implicit bind group layout. We have to remove any existing entries first. + let mut pipeline_layout_guard = hub.pipeline_layouts.write(); + let mut bgl_guard = hub.bind_group_layouts.write(); + if let Some(ref ids) = implicit_error_context { + if pipeline_layout_guard.contains(ids.root_id) { + pipeline_layout_guard.remove(ids.root_id); + } + pipeline_layout_guard.insert_error(ids.root_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL); + for &bgl_id in ids.group_ids.iter() { + if bgl_guard.contains(bgl_id) { + bgl_guard.remove(bgl_id); + } + bgl_guard.insert_error(bgl_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL); + } + } + (id, Some(error)) + } + + /// Get an ID of one of the bind group layouts. The ID adds a refcount, + /// which needs to be released by calling `bind_group_layout_drop`. + pub fn compute_pipeline_get_bind_group_layout<A: HalApi>( + &self, + pipeline_id: id::ComputePipelineId, + index: u32, + id_in: Option<id::BindGroupLayoutId>, + ) -> ( + id::BindGroupLayoutId, + Option<binding_model::GetBindGroupLayoutError>, + ) { + let hub = A::hub(self); + + let error = loop { + let pipeline = match hub.compute_pipelines.get(pipeline_id) { + Ok(pipeline) => pipeline, + Err(_) => break binding_model::GetBindGroupLayoutError::InvalidPipeline, + }; + + let id = match pipeline.layout.bind_group_layouts.get(index as usize) { + Some(bg) => hub.bind_group_layouts.prepare(id_in).assign_existing(bg), + None => break binding_model::GetBindGroupLayoutError::InvalidGroupIndex(index), + }; + + return (id, None); + }; + + let id = hub + .bind_group_layouts + .prepare(id_in) + .assign_error("<derived>"); + (id, Some(error)) + } + + pub fn compute_pipeline_label<A: HalApi>(&self, id: id::ComputePipelineId) -> String { + A::hub(self).compute_pipelines.label_for_resource(id) + } + + pub fn compute_pipeline_drop<A: HalApi>(&self, compute_pipeline_id: id::ComputePipelineId) { + profiling::scope!("ComputePipeline::drop"); + api_log!("ComputePipeline::drop {compute_pipeline_id:?}"); + + let hub = A::hub(self); + + if let Some(pipeline) = hub.compute_pipelines.unregister(compute_pipeline_id) { + let layout_id = pipeline.layout.as_info().id(); + let device = &pipeline.device; + let mut life_lock = device.lock_life(); + life_lock + .suspected_resources + .compute_pipelines + .insert(compute_pipeline_id, pipeline.clone()); + life_lock + .suspected_resources + .pipeline_layouts + .insert(layout_id, pipeline.layout.clone()); + } + } + + pub fn surface_configure<A: HalApi>( + &self, + surface_id: SurfaceId, + device_id: DeviceId, + config: &wgt::SurfaceConfiguration<Vec<TextureFormat>>, + ) -> Option<present::ConfigureSurfaceError> { + use hal::{Adapter as _, Surface as _}; + use present::ConfigureSurfaceError as E; + profiling::scope!("surface_configure"); + + fn validate_surface_configuration( + config: &mut hal::SurfaceConfiguration, + caps: &hal::SurfaceCapabilities, + max_texture_dimension_2d: u32, + ) -> Result<(), E> { + let width = config.extent.width; + let height = config.extent.height; + + if width > max_texture_dimension_2d || height > max_texture_dimension_2d { + return Err(E::TooLarge { + width, + height, + max_texture_dimension_2d, + }); + } + + if !caps.present_modes.contains(&config.present_mode) { + let new_mode = 'b: loop { + // Automatic present mode checks. + // + // The "Automatic" modes are never supported by the backends. + let fallbacks = match config.present_mode { + wgt::PresentMode::AutoVsync => { + &[wgt::PresentMode::FifoRelaxed, wgt::PresentMode::Fifo][..] + } + // Always end in FIFO to make sure it's always supported + wgt::PresentMode::AutoNoVsync => &[ + wgt::PresentMode::Immediate, + wgt::PresentMode::Mailbox, + wgt::PresentMode::Fifo, + ][..], + _ => { + return Err(E::UnsupportedPresentMode { + requested: config.present_mode, + available: caps.present_modes.clone(), + }); + } + }; + + for &fallback in fallbacks { + if caps.present_modes.contains(&fallback) { + break 'b fallback; + } + } + + unreachable!("Fallback system failed to choose present mode. This is a bug. Mode: {:?}, Options: {:?}", config.present_mode, &caps.present_modes); + }; + + api_log!( + "Automatically choosing presentation mode by rule {:?}. Chose {new_mode:?}", + config.present_mode + ); + config.present_mode = new_mode; + } + if !caps.formats.contains(&config.format) { + return Err(E::UnsupportedFormat { + requested: config.format, + available: caps.formats.clone(), + }); + } + if !caps + .composite_alpha_modes + .contains(&config.composite_alpha_mode) + { + let new_alpha_mode = 'alpha: loop { + // Automatic alpha mode checks. + let fallbacks = match config.composite_alpha_mode { + wgt::CompositeAlphaMode::Auto => &[ + wgt::CompositeAlphaMode::Opaque, + wgt::CompositeAlphaMode::Inherit, + ][..], + _ => { + return Err(E::UnsupportedAlphaMode { + requested: config.composite_alpha_mode, + available: caps.composite_alpha_modes.clone(), + }); + } + }; + + for &fallback in fallbacks { + if caps.composite_alpha_modes.contains(&fallback) { + break 'alpha fallback; + } + } + + unreachable!( + "Fallback system failed to choose alpha mode. This is a bug. \ + AlphaMode: {:?}, Options: {:?}", + config.composite_alpha_mode, &caps.composite_alpha_modes + ); + }; + + api_log!( + "Automatically choosing alpha mode by rule {:?}. Chose {new_alpha_mode:?}", + config.composite_alpha_mode + ); + config.composite_alpha_mode = new_alpha_mode; + } + if !caps.usage.contains(config.usage) { + return Err(E::UnsupportedUsage); + } + if width == 0 || height == 0 { + return Err(E::ZeroArea); + } + Ok(()) + } + + log::debug!("configuring surface with {:?}", config); + + let error = 'outer: loop { + // User callbacks must not be called while we are holding locks. + let user_callbacks; + { + let hub = A::hub(self); + let surface_guard = self.surfaces.read(); + let device_guard = hub.devices.read(); + + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + if !device.is_valid() { + break DeviceError::Lost.into(); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::ConfigureSurface(surface_id, config.clone())); + } + + let surface = match surface_guard.get(surface_id) { + Ok(surface) => surface, + Err(_) => break E::InvalidSurface, + }; + + let caps = unsafe { + let suf = A::get_surface(surface); + let adapter = &device.adapter; + match adapter.raw.adapter.surface_capabilities(suf.unwrap()) { + Some(caps) => caps, + None => break E::UnsupportedQueueFamily, + } + }; + + let mut hal_view_formats = vec![]; + for format in config.view_formats.iter() { + if *format == config.format { + continue; + } + if !caps.formats.contains(&config.format) { + break 'outer E::UnsupportedFormat { + requested: config.format, + available: caps.formats, + }; + } + if config.format.remove_srgb_suffix() != format.remove_srgb_suffix() { + break 'outer E::InvalidViewFormat(*format, config.format); + } + hal_view_formats.push(*format); + } + + if !hal_view_formats.is_empty() { + if let Err(missing_flag) = + device.require_downlevel_flags(wgt::DownlevelFlags::SURFACE_VIEW_FORMATS) + { + break 'outer E::MissingDownlevelFlags(missing_flag); + } + } + + let maximum_frame_latency = config.desired_maximum_frame_latency.clamp( + *caps.maximum_frame_latency.start(), + *caps.maximum_frame_latency.end(), + ); + let mut hal_config = hal::SurfaceConfiguration { + maximum_frame_latency, + present_mode: config.present_mode, + composite_alpha_mode: config.alpha_mode, + format: config.format, + extent: wgt::Extent3d { + width: config.width, + height: config.height, + depth_or_array_layers: 1, + }, + usage: conv::map_texture_usage(config.usage, hal::FormatAspects::COLOR), + view_formats: hal_view_formats, + }; + + if let Err(error) = validate_surface_configuration( + &mut hal_config, + &caps, + device.limits.max_texture_dimension_2d, + ) { + break error; + } + + // Wait for all work to finish before configuring the surface. + let fence = device.fence.read(); + let fence = fence.as_ref().unwrap(); + match device.maintain(fence, wgt::Maintain::Wait) { + Ok((closures, _)) => { + user_callbacks = closures; + } + Err(e) => { + break e.into(); + } + } + + // All textures must be destroyed before the surface can be re-configured. + if let Some(present) = surface.presentation.lock().take() { + if present.acquired_texture.is_some() { + break E::PreviousOutputExists; + } + } + + // TODO: Texture views may still be alive that point to the texture. + // this will allow the user to render to the surface texture, long after + // it has been removed. + // + // https://github.com/gfx-rs/wgpu/issues/4105 + + match unsafe { + A::get_surface(surface) + .unwrap() + .configure(device.raw(), &hal_config) + } { + Ok(()) => (), + Err(error) => { + break match error { + hal::SurfaceError::Outdated | hal::SurfaceError::Lost => { + E::InvalidSurface + } + hal::SurfaceError::Device(error) => E::Device(error.into()), + hal::SurfaceError::Other(message) => { + log::error!("surface configuration failed: {}", message); + E::InvalidSurface + } + } + } + } + + let mut presentation = surface.presentation.lock(); + *presentation = Some(present::Presentation { + device: super::any_device::AnyDevice::new(device.clone()), + config: config.clone(), + acquired_texture: None, + }); + } + + user_callbacks.fire(); + return None; + }; + + Some(error) + } + + #[cfg(feature = "replay")] + /// Only triangle suspected resource IDs. This helps us to avoid ID collisions + /// upon creating new resources when re-playing a trace. + pub fn device_maintain_ids<A: HalApi>(&self, device_id: DeviceId) -> Result<(), InvalidDevice> { + let hub = A::hub(self); + + let device = hub.devices.get(device_id).map_err(|_| InvalidDevice)?; + if !device.is_valid() { + return Err(InvalidDevice); + } + device.lock_life().triage_suspected(&device.trackers); + Ok(()) + } + + /// Check `device_id` for freeable resources and completed buffer mappings. + /// + /// Return `queue_empty` indicating whether there are more queue submissions still in flight. + pub fn device_poll<A: HalApi>( + &self, + device_id: DeviceId, + maintain: wgt::Maintain<queue::WrappedSubmissionIndex>, + ) -> Result<bool, WaitIdleError> { + api_log!("Device::poll"); + + let hub = A::hub(self); + let device = hub + .devices + .get(device_id) + .map_err(|_| DeviceError::Invalid)?; + + let (closures, queue_empty) = { + if let wgt::Maintain::WaitForSubmissionIndex(submission_index) = maintain { + if submission_index.queue_id != device_id.transmute() { + return Err(WaitIdleError::WrongSubmissionIndex( + submission_index.queue_id, + device_id, + )); + } + } + + let fence = device.fence.read(); + let fence = fence.as_ref().unwrap(); + device.maintain(fence, maintain)? + }; + + // Some deferred destroys are scheduled in maintain so run this right after + // to avoid holding on to them until the next device poll. + device.deferred_resource_destruction(); + + closures.fire(); + + Ok(queue_empty) + } + + /// Poll all devices belonging to the backend `A`. + /// + /// If `force_wait` is true, block until all buffer mappings are done. + /// + /// Return `all_queue_empty` indicating whether there are more queue + /// submissions still in flight. + fn poll_device<A: HalApi>( + &self, + force_wait: bool, + closures: &mut UserClosures, + ) -> Result<bool, WaitIdleError> { + profiling::scope!("poll_device"); + + let hub = A::hub(self); + let mut all_queue_empty = true; + { + let device_guard = hub.devices.read(); + + for (_id, device) in device_guard.iter(A::VARIANT) { + let maintain = if force_wait { + wgt::Maintain::Wait + } else { + wgt::Maintain::Poll + }; + let fence = device.fence.read(); + let fence = fence.as_ref().unwrap(); + let (cbs, queue_empty) = device.maintain(fence, maintain)?; + all_queue_empty = all_queue_empty && queue_empty; + + closures.extend(cbs); + } + } + + Ok(all_queue_empty) + } + + /// Poll all devices on all backends. + /// + /// This is the implementation of `wgpu::Instance::poll_all`. + /// + /// Return `all_queue_empty` indicating whether there are more queue + /// submissions still in flight. + pub fn poll_all_devices(&self, force_wait: bool) -> Result<bool, WaitIdleError> { + api_log!("poll_all_devices"); + let mut closures = UserClosures::default(); + let mut all_queue_empty = true; + + #[cfg(vulkan)] + { + all_queue_empty = + self.poll_device::<hal::api::Vulkan>(force_wait, &mut closures)? && all_queue_empty; + } + #[cfg(metal)] + { + all_queue_empty = + self.poll_device::<hal::api::Metal>(force_wait, &mut closures)? && all_queue_empty; + } + #[cfg(dx12)] + { + all_queue_empty = + self.poll_device::<hal::api::Dx12>(force_wait, &mut closures)? && all_queue_empty; + } + #[cfg(gles)] + { + all_queue_empty = + self.poll_device::<hal::api::Gles>(force_wait, &mut closures)? && all_queue_empty; + } + + closures.fire(); + + Ok(all_queue_empty) + } + + pub fn device_label<A: HalApi>(&self, id: DeviceId) -> String { + A::hub(self).devices.label_for_resource(id) + } + + pub fn device_start_capture<A: HalApi>(&self, id: DeviceId) { + api_log!("Device::start_capture"); + + let hub = A::hub(self); + + if let Ok(device) = hub.devices.get(id) { + if !device.is_valid() { + return; + } + unsafe { device.raw().start_capture() }; + } + } + + pub fn device_stop_capture<A: HalApi>(&self, id: DeviceId) { + api_log!("Device::stop_capture"); + + let hub = A::hub(self); + + if let Ok(device) = hub.devices.get(id) { + if !device.is_valid() { + return; + } + unsafe { device.raw().stop_capture() }; + } + } + + pub fn device_drop<A: HalApi>(&self, device_id: DeviceId) { + profiling::scope!("Device::drop"); + api_log!("Device::drop {device_id:?}"); + + let hub = A::hub(self); + if let Some(device) = hub.devices.unregister(device_id) { + let device_lost_closure = device.lock_life().device_lost_closure.take(); + if let Some(closure) = device_lost_closure { + closure.call(DeviceLostReason::Dropped, String::from("Device dropped.")); + } + + // The things `Device::prepare_to_die` takes care are mostly + // unnecessary here. We know our queue is empty, so we don't + // need to wait for submissions or triage them. We know we were + // just polled, so `life_tracker.free_resources` is empty. + debug_assert!(device.lock_life().queue_empty()); + { + let mut pending_writes = device.pending_writes.lock(); + let pending_writes = pending_writes.as_mut().unwrap(); + pending_writes.deactivate(); + } + + drop(device); + } + } + + // This closure will be called exactly once during "lose the device", + // or when it is replaced. + pub fn device_set_device_lost_closure<A: HalApi>( + &self, + device_id: DeviceId, + device_lost_closure: DeviceLostClosure, + ) { + let hub = A::hub(self); + + if let Ok(device) = hub.devices.get(device_id) { + let mut life_tracker = device.lock_life(); + if let Some(existing_closure) = life_tracker.device_lost_closure.take() { + // It's important to not hold the lock while calling the closure. + drop(life_tracker); + existing_closure.call(DeviceLostReason::ReplacedCallback, "".to_string()); + life_tracker = device.lock_life(); + } + life_tracker.device_lost_closure = Some(device_lost_closure); + } + } + + pub fn device_destroy<A: HalApi>(&self, device_id: DeviceId) { + api_log!("Device::destroy {device_id:?}"); + + let hub = A::hub(self); + + if let Ok(device) = hub.devices.get(device_id) { + // Follow the steps at + // https://gpuweb.github.io/gpuweb/#dom-gpudevice-destroy. + // It's legal to call destroy multiple times, but if the device + // is already invalid, there's nothing more to do. There's also + // no need to return an error. + if !device.is_valid() { + return; + } + + // The last part of destroy is to lose the device. The spec says + // delay that until all "currently-enqueued operations on any + // queue on this device are completed." This is accomplished by + // setting valid to false, and then relying upon maintain to + // check for empty queues and a DeviceLostClosure. At that time, + // the DeviceLostClosure will be called with "destroyed" as the + // reason. + device.valid.store(false, Ordering::Relaxed); + } + } + + pub fn device_mark_lost<A: HalApi>(&self, device_id: DeviceId, message: &str) { + api_log!("Device::mark_lost {device_id:?}"); + + let hub = A::hub(self); + + if let Ok(device) = hub.devices.get(device_id) { + device.lose(message); + } + } + + pub fn queue_drop<A: HalApi>(&self, queue_id: QueueId) { + profiling::scope!("Queue::drop"); + api_log!("Queue::drop {queue_id:?}"); + + let hub = A::hub(self); + if let Some(queue) = hub.queues.unregister(queue_id) { + drop(queue); + } + } + + pub fn buffer_map_async<A: HalApi>( + &self, + buffer_id: id::BufferId, + range: Range<BufferAddress>, + op: BufferMapOperation, + ) -> BufferAccessResult { + api_log!("Buffer::map_async {buffer_id:?} range {range:?} op: {op:?}"); + + // User callbacks must not be called while holding buffer_map_async_inner's locks, so we + // defer the error callback if it needs to be called immediately (typically when running + // into errors). + if let Err((mut operation, err)) = self.buffer_map_async_inner::<A>(buffer_id, range, op) { + if let Some(callback) = operation.callback.take() { + callback.call(Err(err.clone())); + } + log::error!("Buffer::map_async error: {err}"); + return Err(err); + } + + Ok(()) + } + + // Returns the mapping callback in case of error so that the callback can be fired outside + // of the locks that are held in this function. + fn buffer_map_async_inner<A: HalApi>( + &self, + buffer_id: id::BufferId, + range: Range<BufferAddress>, + op: BufferMapOperation, + ) -> Result<(), (BufferMapOperation, BufferAccessError)> { + profiling::scope!("Buffer::map_async"); + + let hub = A::hub(self); + + let (pub_usage, internal_use) = match op.host { + HostMap::Read => (wgt::BufferUsages::MAP_READ, hal::BufferUses::MAP_READ), + HostMap::Write => (wgt::BufferUsages::MAP_WRITE, hal::BufferUses::MAP_WRITE), + }; + + if range.start % wgt::MAP_ALIGNMENT != 0 || range.end % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err((op, BufferAccessError::UnalignedRange)); + } + + let buffer = { + let buffer = hub + .buffers + .get(buffer_id) + .map_err(|_| BufferAccessError::Invalid); + + let buffer = match buffer { + Ok(b) => b, + Err(e) => { + return Err((op, e)); + } + }; + + let device = &buffer.device; + if !device.is_valid() { + return Err((op, DeviceError::Lost.into())); + } + + if let Err(e) = check_buffer_usage(buffer.usage, pub_usage) { + return Err((op, e.into())); + } + + if range.start > range.end { + return Err(( + op, + BufferAccessError::NegativeRange { + start: range.start, + end: range.end, + }, + )); + } + if range.end > buffer.size { + return Err(( + op, + BufferAccessError::OutOfBoundsOverrun { + index: range.end, + max: buffer.size, + }, + )); + } + + let snatch_guard = device.snatchable_lock.read(); + if buffer.is_destroyed(&snatch_guard) { + return Err((op, BufferAccessError::Destroyed)); + } + + { + let map_state = &mut *buffer.map_state.lock(); + *map_state = match *map_state { + resource::BufferMapState::Init { .. } + | resource::BufferMapState::Active { .. } => { + return Err((op, BufferAccessError::AlreadyMapped)); + } + resource::BufferMapState::Waiting(_) => { + return Err((op, BufferAccessError::MapAlreadyPending)); + } + resource::BufferMapState::Idle => { + resource::BufferMapState::Waiting(resource::BufferPendingMapping { + range, + op, + _parent_buffer: buffer.clone(), + }) + } + }; + } + + { + let mut trackers = buffer.device.as_ref().trackers.lock(); + trackers.buffers.set_single(&buffer, internal_use); + //TODO: Check if draining ALL buffers is correct! + let _ = trackers.buffers.drain_transitions(&snatch_guard); + } + + drop(snatch_guard); + + buffer + }; + + buffer.device.lock_life().map(&buffer); + + Ok(()) + } + + pub fn buffer_get_mapped_range<A: HalApi>( + &self, + buffer_id: id::BufferId, + offset: BufferAddress, + size: Option<BufferAddress>, + ) -> Result<(*mut u8, u64), BufferAccessError> { + profiling::scope!("Buffer::get_mapped_range"); + api_log!("Buffer::get_mapped_range {buffer_id:?} offset {offset:?} size {size:?}"); + + let hub = A::hub(self); + + let buffer = hub + .buffers + .get(buffer_id) + .map_err(|_| BufferAccessError::Invalid)?; + + { + let snatch_guard = buffer.device.snatchable_lock.read(); + if buffer.is_destroyed(&snatch_guard) { + return Err(BufferAccessError::Destroyed); + } + } + + let range_size = if let Some(size) = size { + size + } else if offset > buffer.size { + 0 + } else { + buffer.size - offset + }; + + if offset % wgt::MAP_ALIGNMENT != 0 { + return Err(BufferAccessError::UnalignedOffset { offset }); + } + if range_size % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(BufferAccessError::UnalignedRangeSize { range_size }); + } + let map_state = &*buffer.map_state.lock(); + match *map_state { + resource::BufferMapState::Init { ref ptr, .. } => { + // offset (u64) can not be < 0, so no need to validate the lower bound + if offset + range_size > buffer.size { + return Err(BufferAccessError::OutOfBoundsOverrun { + index: offset + range_size - 1, + max: buffer.size, + }); + } + unsafe { Ok((ptr.as_ptr().offset(offset as isize), range_size)) } + } + resource::BufferMapState::Active { + ref ptr, ref range, .. + } => { + if offset < range.start { + return Err(BufferAccessError::OutOfBoundsUnderrun { + index: offset, + min: range.start, + }); + } + if offset + range_size > range.end { + return Err(BufferAccessError::OutOfBoundsOverrun { + index: offset + range_size - 1, + max: range.end, + }); + } + // ptr points to the beginning of the range we mapped in map_async + // rather than the beginning of the buffer. + let relative_offset = (offset - range.start) as isize; + unsafe { Ok((ptr.as_ptr().offset(relative_offset), range_size)) } + } + resource::BufferMapState::Idle | resource::BufferMapState::Waiting(_) => { + Err(BufferAccessError::NotMapped) + } + } + } + pub fn buffer_unmap<A: HalApi>(&self, buffer_id: id::BufferId) -> BufferAccessResult { + profiling::scope!("unmap", "Buffer"); + api_log!("Buffer::unmap {buffer_id:?}"); + + let hub = A::hub(self); + + let buffer = hub + .buffers + .get(buffer_id) + .map_err(|_| BufferAccessError::Invalid)?; + + let snatch_guard = buffer.device.snatchable_lock.read(); + if buffer.is_destroyed(&snatch_guard) { + return Err(BufferAccessError::Destroyed); + } + drop(snatch_guard); + + if !buffer.device.is_valid() { + return Err(DeviceError::Lost.into()); + } + + buffer.unmap() + } +} diff --git a/third_party/rust/wgpu-core/src/device/life.rs b/third_party/rust/wgpu-core/src/device/life.rs new file mode 100644 index 0000000000..86c5d027c7 --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/life.rs @@ -0,0 +1,831 @@ +use crate::{ + binding_model::{BindGroup, BindGroupLayout, PipelineLayout}, + command::RenderBundle, + device::{ + queue::{EncoderInFlight, SubmittedWorkDoneClosure, TempResource}, + DeviceError, DeviceLostClosure, + }, + hal_api::HalApi, + id::{ + self, BindGroupId, BindGroupLayoutId, BufferId, ComputePipelineId, Id, PipelineLayoutId, + QuerySetId, RenderBundleId, RenderPipelineId, SamplerId, StagingBufferId, TextureId, + TextureViewId, + }, + pipeline::{ComputePipeline, RenderPipeline}, + resource::{ + self, Buffer, DestroyedBuffer, DestroyedTexture, QuerySet, Resource, Sampler, + StagingBuffer, Texture, TextureView, + }, + track::{ResourceTracker, Tracker}, + FastHashMap, SubmissionIndex, +}; +use smallvec::SmallVec; + +use parking_lot::Mutex; +use std::sync::Arc; +use thiserror::Error; + +/// A struct that keeps lists of resources that are no longer needed by the user. +#[derive(Default)] +pub(crate) struct ResourceMaps<A: HalApi> { + pub buffers: FastHashMap<BufferId, Arc<Buffer<A>>>, + pub staging_buffers: FastHashMap<StagingBufferId, Arc<StagingBuffer<A>>>, + pub textures: FastHashMap<TextureId, Arc<Texture<A>>>, + pub texture_views: FastHashMap<TextureViewId, Arc<TextureView<A>>>, + pub samplers: FastHashMap<SamplerId, Arc<Sampler<A>>>, + pub bind_groups: FastHashMap<BindGroupId, Arc<BindGroup<A>>>, + pub bind_group_layouts: FastHashMap<BindGroupLayoutId, Arc<BindGroupLayout<A>>>, + pub render_pipelines: FastHashMap<RenderPipelineId, Arc<RenderPipeline<A>>>, + pub compute_pipelines: FastHashMap<ComputePipelineId, Arc<ComputePipeline<A>>>, + pub pipeline_layouts: FastHashMap<PipelineLayoutId, Arc<PipelineLayout<A>>>, + pub render_bundles: FastHashMap<RenderBundleId, Arc<RenderBundle<A>>>, + pub query_sets: FastHashMap<QuerySetId, Arc<QuerySet<A>>>, + pub destroyed_buffers: FastHashMap<BufferId, Arc<DestroyedBuffer<A>>>, + pub destroyed_textures: FastHashMap<TextureId, Arc<DestroyedTexture<A>>>, +} + +impl<A: HalApi> ResourceMaps<A> { + pub(crate) fn new() -> Self { + ResourceMaps { + buffers: FastHashMap::default(), + staging_buffers: FastHashMap::default(), + textures: FastHashMap::default(), + texture_views: FastHashMap::default(), + samplers: FastHashMap::default(), + bind_groups: FastHashMap::default(), + bind_group_layouts: FastHashMap::default(), + render_pipelines: FastHashMap::default(), + compute_pipelines: FastHashMap::default(), + pipeline_layouts: FastHashMap::default(), + render_bundles: FastHashMap::default(), + query_sets: FastHashMap::default(), + destroyed_buffers: FastHashMap::default(), + destroyed_textures: FastHashMap::default(), + } + } + + pub(crate) fn clear(&mut self) { + let ResourceMaps { + buffers, + staging_buffers, + textures, + texture_views, + samplers, + bind_groups, + bind_group_layouts, + render_pipelines, + compute_pipelines, + pipeline_layouts, + render_bundles, + query_sets, + destroyed_buffers, + destroyed_textures, + } = self; + buffers.clear(); + staging_buffers.clear(); + textures.clear(); + texture_views.clear(); + samplers.clear(); + bind_groups.clear(); + bind_group_layouts.clear(); + render_pipelines.clear(); + compute_pipelines.clear(); + pipeline_layouts.clear(); + render_bundles.clear(); + query_sets.clear(); + destroyed_buffers.clear(); + destroyed_textures.clear(); + } + + pub(crate) fn extend(&mut self, mut other: Self) { + let ResourceMaps { + buffers, + staging_buffers, + textures, + texture_views, + samplers, + bind_groups, + bind_group_layouts, + render_pipelines, + compute_pipelines, + pipeline_layouts, + render_bundles, + query_sets, + destroyed_buffers, + destroyed_textures, + } = self; + buffers.extend(other.buffers.drain()); + staging_buffers.extend(other.staging_buffers.drain()); + textures.extend(other.textures.drain()); + texture_views.extend(other.texture_views.drain()); + samplers.extend(other.samplers.drain()); + bind_groups.extend(other.bind_groups.drain()); + bind_group_layouts.extend(other.bind_group_layouts.drain()); + render_pipelines.extend(other.render_pipelines.drain()); + compute_pipelines.extend(other.compute_pipelines.drain()); + pipeline_layouts.extend(other.pipeline_layouts.drain()); + render_bundles.extend(other.render_bundles.drain()); + query_sets.extend(other.query_sets.drain()); + destroyed_buffers.extend(other.destroyed_buffers.drain()); + destroyed_textures.extend(other.destroyed_textures.drain()); + } +} + +/// Resources used by a queue submission, and work to be done once it completes. +struct ActiveSubmission<A: HalApi> { + /// The index of the submission we track. + /// + /// When `Device::fence`'s value is greater than or equal to this, our queue + /// submission has completed. + index: SubmissionIndex, + + /// Resources to be freed once this queue submission has completed. + /// + /// When the device is polled, for completed submissions, + /// `triage_submissions` removes resources that don't need to be held alive any longer + /// from there. + /// + /// This includes things like temporary resources and resources that are + /// used by submitted commands but have been dropped by the user (meaning that + /// this submission is their last reference.) + last_resources: ResourceMaps<A>, + + /// Buffers to be mapped once this submission has completed. + mapped: Vec<Arc<Buffer<A>>>, + + encoders: Vec<EncoderInFlight<A>>, + + /// List of queue "on_submitted_work_done" closures to be called once this + /// submission has completed. + work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>, +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum WaitIdleError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Tried to wait using a submission index from the wrong device. Submission index is from device {0:?}. Called poll on device {1:?}.")] + WrongSubmissionIndex(id::QueueId, id::DeviceId), + #[error("GPU got stuck :(")] + StuckGpu, +} + +/// Resource tracking for a device. +/// +/// ## Host mapping buffers +/// +/// A buffer cannot be mapped until all active queue submissions that use it +/// have completed. To that end: +/// +/// - Each buffer's `ResourceInfo::submission_index` records the index of the +/// most recent queue submission that uses that buffer. +/// +/// - Calling `Global::buffer_map_async` adds the buffer to +/// `self.mapped`, and changes `Buffer::map_state` to prevent it +/// from being used in any new submissions. +/// +/// - When the device is polled, the following `LifetimeTracker` methods decide +/// what should happen next: +/// +/// 1) `triage_mapped` drains `self.mapped`, checking the submission index +/// of each buffer against the queue submissions that have finished +/// execution. Buffers used by submissions still in flight go in +/// `self.active[index].mapped`, and the rest go into +/// `self.ready_to_map`. +/// +/// 2) `triage_submissions` moves entries in `self.active[i]` for completed +/// submissions to `self.ready_to_map`. At this point, both +/// `self.active` and `self.ready_to_map` are up to date with the given +/// submission index. +/// +/// 3) `handle_mapping` drains `self.ready_to_map` and actually maps the +/// buffers, collecting a list of notification closures to call. But any +/// buffers that were dropped by the user get moved to +/// `self.free_resources`. +/// +/// Only calling `Global::buffer_map_async` clones a new `Arc` for the +/// buffer. This new `Arc` is only dropped by `handle_mapping`. +pub(crate) struct LifetimeTracker<A: HalApi> { + /// Resources that the user has requested be mapped, but which are used by + /// queue submissions still in flight. + mapped: Vec<Arc<Buffer<A>>>, + + /// Buffers can be used in a submission that is yet to be made, by the + /// means of `write_buffer()`, so we have a special place for them. + pub future_suspected_buffers: Vec<Arc<Buffer<A>>>, + + /// Textures can be used in the upcoming submission by `write_texture`. + pub future_suspected_textures: Vec<Arc<Texture<A>>>, + + /// Resources whose user handle has died (i.e. drop/destroy has been called) + /// and will likely be ready for destruction soon. + pub suspected_resources: ResourceMaps<A>, + + /// Resources used by queue submissions still in flight. One entry per + /// submission, with older submissions appearing before younger. + /// + /// Entries are added by `track_submission` and drained by + /// `LifetimeTracker::triage_submissions`. Lots of methods contribute data + /// to particular entries. + active: Vec<ActiveSubmission<A>>, + + /// Buffers the user has asked us to map, and which are not used by any + /// queue submission still in flight. + ready_to_map: Vec<Arc<Buffer<A>>>, + + /// Queue "on_submitted_work_done" closures that were initiated for while there is no + /// currently pending submissions. These cannot be immediately invoked as they + /// must happen _after_ all mapped buffer callbacks are mapped, so we defer them + /// here until the next time the device is maintained. + work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>, + + /// Closure to be called on "lose the device". This is invoked directly by + /// device.lose or by the UserCallbacks returned from maintain when the device + /// has been destroyed and its queues are empty. + pub device_lost_closure: Option<DeviceLostClosure>, +} + +impl<A: HalApi> LifetimeTracker<A> { + pub fn new() -> Self { + Self { + mapped: Vec::new(), + future_suspected_buffers: Vec::new(), + future_suspected_textures: Vec::new(), + suspected_resources: ResourceMaps::new(), + active: Vec::new(), + ready_to_map: Vec::new(), + work_done_closures: SmallVec::new(), + device_lost_closure: None, + } + } + + /// Return true if there are no queue submissions still in flight. + pub fn queue_empty(&self) -> bool { + self.active.is_empty() + } + + /// Start tracking resources associated with a new queue submission. + pub fn track_submission( + &mut self, + index: SubmissionIndex, + temp_resources: impl Iterator<Item = TempResource<A>>, + encoders: Vec<EncoderInFlight<A>>, + ) { + let mut last_resources = ResourceMaps::new(); + for res in temp_resources { + match res { + TempResource::Buffer(raw) => { + last_resources.buffers.insert(raw.as_info().id(), raw); + } + TempResource::StagingBuffer(raw) => { + last_resources + .staging_buffers + .insert(raw.as_info().id(), raw); + } + TempResource::DestroyedBuffer(destroyed) => { + last_resources + .destroyed_buffers + .insert(destroyed.id, destroyed); + } + TempResource::Texture(raw) => { + last_resources.textures.insert(raw.as_info().id(), raw); + } + TempResource::DestroyedTexture(destroyed) => { + last_resources + .destroyed_textures + .insert(destroyed.id, destroyed); + } + } + } + + self.active.push(ActiveSubmission { + index, + last_resources, + mapped: Vec::new(), + encoders, + work_done_closures: SmallVec::new(), + }); + } + + pub fn post_submit(&mut self) { + for v in self.future_suspected_buffers.drain(..).take(1) { + self.suspected_resources.buffers.insert(v.as_info().id(), v); + } + for v in self.future_suspected_textures.drain(..).take(1) { + self.suspected_resources + .textures + .insert(v.as_info().id(), v); + } + } + + pub(crate) fn map(&mut self, value: &Arc<Buffer<A>>) { + self.mapped.push(value.clone()); + } + + /// Sort out the consequences of completed submissions. + /// + /// Assume that all submissions up through `last_done` have completed. + /// + /// - Buffers used by those submissions are now ready to map, if + /// requested. Add any buffers in the submission's [`mapped`] list to + /// [`self.ready_to_map`], where [`LifetimeTracker::handle_mapping`] will find + /// them. + /// + /// - Resources whose final use was in those submissions are now ready to + /// free. Add any resources in the submission's [`last_resources`] table + /// to [`self.free_resources`], where [`LifetimeTracker::cleanup`] will find + /// them. + /// + /// Return a list of [`SubmittedWorkDoneClosure`]s to run. + /// + /// [`mapped`]: ActiveSubmission::mapped + /// [`self.ready_to_map`]: LifetimeTracker::ready_to_map + /// [`last_resources`]: ActiveSubmission::last_resources + /// [`self.free_resources`]: LifetimeTracker::free_resources + /// [`SubmittedWorkDoneClosure`]: crate::device::queue::SubmittedWorkDoneClosure + #[must_use] + pub fn triage_submissions( + &mut self, + last_done: SubmissionIndex, + command_allocator: &mut super::CommandAllocator<A>, + ) -> SmallVec<[SubmittedWorkDoneClosure; 1]> { + profiling::scope!("triage_submissions"); + + //TODO: enable when `is_sorted_by_key` is stable + //debug_assert!(self.active.is_sorted_by_key(|a| a.index)); + let done_count = self + .active + .iter() + .position(|a| a.index > last_done) + .unwrap_or(self.active.len()); + + let mut work_done_closures: SmallVec<_> = self.work_done_closures.drain(..).collect(); + for a in self.active.drain(..done_count) { + log::debug!("Active submission {} is done", a.index); + self.ready_to_map.extend(a.mapped); + for encoder in a.encoders { + let raw = unsafe { encoder.land() }; + command_allocator.release_encoder(raw); + } + work_done_closures.extend(a.work_done_closures); + } + work_done_closures + } + + pub fn schedule_resource_destruction( + &mut self, + temp_resource: TempResource<A>, + last_submit_index: SubmissionIndex, + ) { + let resources = self + .active + .iter_mut() + .find(|a| a.index == last_submit_index) + .map(|a| &mut a.last_resources); + if let Some(resources) = resources { + match temp_resource { + TempResource::Buffer(raw) => { + resources.buffers.insert(raw.as_info().id(), raw); + } + TempResource::StagingBuffer(raw) => { + resources.staging_buffers.insert(raw.as_info().id(), raw); + } + TempResource::DestroyedBuffer(destroyed) => { + resources.destroyed_buffers.insert(destroyed.id, destroyed); + } + TempResource::Texture(raw) => { + resources.textures.insert(raw.as_info().id(), raw); + } + TempResource::DestroyedTexture(destroyed) => { + resources.destroyed_textures.insert(destroyed.id, destroyed); + } + } + } + } + + pub fn add_work_done_closure(&mut self, closure: SubmittedWorkDoneClosure) { + match self.active.last_mut() { + Some(active) => { + active.work_done_closures.push(closure); + } + // We must defer the closure until all previously occurring map_async closures + // have fired. This is required by the spec. + None => { + self.work_done_closures.push(closure); + } + } + } +} + +impl<A: HalApi> LifetimeTracker<A> { + fn triage_resources<R>( + resources_map: &mut FastHashMap<Id<R::Marker>, Arc<R>>, + active: &mut [ActiveSubmission<A>], + trackers: &mut impl ResourceTracker<R>, + get_resource_map: impl Fn(&mut ResourceMaps<A>) -> &mut FastHashMap<Id<R::Marker>, Arc<R>>, + ) -> Vec<Arc<R>> + where + R: Resource, + { + let mut removed_resources = Vec::new(); + resources_map.retain(|&id, resource| { + let submit_index = resource.as_info().submission_index(); + let non_referenced_resources = active + .iter_mut() + .find(|a| a.index == submit_index) + .map(|a| &mut a.last_resources); + + let is_removed = trackers.remove_abandoned(id); + if is_removed { + removed_resources.push(resource.clone()); + if let Some(resources) = non_referenced_resources { + get_resource_map(resources).insert(id, resource.clone()); + } + } + !is_removed + }); + removed_resources + } + + fn triage_suspected_render_bundles(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self { + let mut trackers = trackers.lock(); + let resource_map = &mut self.suspected_resources.render_bundles; + let mut removed_resources = Self::triage_resources( + resource_map, + self.active.as_mut_slice(), + &mut trackers.bundles, + |maps| &mut maps.render_bundles, + ); + removed_resources.drain(..).for_each(|bundle| { + for v in bundle.used.buffers.write().drain_resources() { + self.suspected_resources.buffers.insert(v.as_info().id(), v); + } + for v in bundle.used.textures.write().drain_resources() { + self.suspected_resources + .textures + .insert(v.as_info().id(), v); + } + for v in bundle.used.bind_groups.write().drain_resources() { + self.suspected_resources + .bind_groups + .insert(v.as_info().id(), v); + } + for v in bundle.used.render_pipelines.write().drain_resources() { + self.suspected_resources + .render_pipelines + .insert(v.as_info().id(), v); + } + for v in bundle.used.query_sets.write().drain_resources() { + self.suspected_resources + .query_sets + .insert(v.as_info().id(), v); + } + }); + self + } + + fn triage_suspected_bind_groups(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self { + let mut trackers = trackers.lock(); + let resource_map = &mut self.suspected_resources.bind_groups; + let mut removed_resource = Self::triage_resources( + resource_map, + self.active.as_mut_slice(), + &mut trackers.bind_groups, + |maps| &mut maps.bind_groups, + ); + removed_resource.drain(..).for_each(|bind_group| { + for v in bind_group.used.buffers.drain_resources() { + self.suspected_resources.buffers.insert(v.as_info().id(), v); + } + for v in bind_group.used.textures.drain_resources() { + self.suspected_resources + .textures + .insert(v.as_info().id(), v); + } + for v in bind_group.used.views.drain_resources() { + self.suspected_resources + .texture_views + .insert(v.as_info().id(), v); + } + for v in bind_group.used.samplers.drain_resources() { + self.suspected_resources + .samplers + .insert(v.as_info().id(), v); + } + + self.suspected_resources + .bind_group_layouts + .insert(bind_group.layout.as_info().id(), bind_group.layout.clone()); + }); + self + } + + fn triage_suspected_texture_views(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self { + let mut trackers = trackers.lock(); + let resource_map = &mut self.suspected_resources.texture_views; + Self::triage_resources( + resource_map, + self.active.as_mut_slice(), + &mut trackers.views, + |maps| &mut maps.texture_views, + ); + self + } + + fn triage_suspected_textures(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self { + let mut trackers = trackers.lock(); + let resource_map = &mut self.suspected_resources.textures; + Self::triage_resources( + resource_map, + self.active.as_mut_slice(), + &mut trackers.textures, + |maps| &mut maps.textures, + ); + self + } + + fn triage_suspected_samplers(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self { + let mut trackers = trackers.lock(); + let resource_map = &mut self.suspected_resources.samplers; + Self::triage_resources( + resource_map, + self.active.as_mut_slice(), + &mut trackers.samplers, + |maps| &mut maps.samplers, + ); + self + } + + fn triage_suspected_buffers(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self { + let mut trackers = trackers.lock(); + let resource_map = &mut self.suspected_resources.buffers; + Self::triage_resources( + resource_map, + self.active.as_mut_slice(), + &mut trackers.buffers, + |maps| &mut maps.buffers, + ); + + self + } + + fn triage_suspected_destroyed_buffers(&mut self) { + for (id, buffer) in self.suspected_resources.destroyed_buffers.drain() { + let submit_index = buffer.submission_index; + if let Some(resources) = self.active.iter_mut().find(|a| a.index == submit_index) { + resources + .last_resources + .destroyed_buffers + .insert(id, buffer); + } + } + } + + fn triage_suspected_destroyed_textures(&mut self) { + for (id, texture) in self.suspected_resources.destroyed_textures.drain() { + let submit_index = texture.submission_index; + if let Some(resources) = self.active.iter_mut().find(|a| a.index == submit_index) { + resources + .last_resources + .destroyed_textures + .insert(id, texture); + } + } + } + + fn triage_suspected_compute_pipelines(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self { + let mut trackers = trackers.lock(); + let resource_map = &mut self.suspected_resources.compute_pipelines; + let mut removed_resources = Self::triage_resources( + resource_map, + self.active.as_mut_slice(), + &mut trackers.compute_pipelines, + |maps| &mut maps.compute_pipelines, + ); + removed_resources.drain(..).for_each(|compute_pipeline| { + self.suspected_resources.pipeline_layouts.insert( + compute_pipeline.layout.as_info().id(), + compute_pipeline.layout.clone(), + ); + }); + self + } + + fn triage_suspected_render_pipelines(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self { + let mut trackers = trackers.lock(); + let resource_map = &mut self.suspected_resources.render_pipelines; + let mut removed_resources = Self::triage_resources( + resource_map, + self.active.as_mut_slice(), + &mut trackers.render_pipelines, + |maps| &mut maps.render_pipelines, + ); + removed_resources.drain(..).for_each(|render_pipeline| { + self.suspected_resources.pipeline_layouts.insert( + render_pipeline.layout.as_info().id(), + render_pipeline.layout.clone(), + ); + }); + self + } + + fn triage_suspected_pipeline_layouts(&mut self) -> &mut Self { + let mut removed_resources = Vec::new(); + self.suspected_resources + .pipeline_layouts + .retain(|_pipeline_layout_id, pipeline_layout| { + removed_resources.push(pipeline_layout.clone()); + false + }); + removed_resources.drain(..).for_each(|pipeline_layout| { + for bgl in &pipeline_layout.bind_group_layouts { + self.suspected_resources + .bind_group_layouts + .insert(bgl.as_info().id(), bgl.clone()); + } + }); + self + } + + fn triage_suspected_bind_group_layouts(&mut self) -> &mut Self { + //Note: this has to happen after all the suspected pipelines are destroyed + //Note: nothing else can bump the refcount since the guard is locked exclusively + //Note: same BGL can appear multiple times in the list, but only the last + self.suspected_resources.bind_group_layouts.clear(); + + self + } + + fn triage_suspected_query_sets(&mut self, trackers: &Mutex<Tracker<A>>) -> &mut Self { + let mut trackers = trackers.lock(); + let resource_map = &mut self.suspected_resources.query_sets; + Self::triage_resources( + resource_map, + self.active.as_mut_slice(), + &mut trackers.query_sets, + |maps| &mut maps.query_sets, + ); + self + } + + fn triage_suspected_staging_buffers(&mut self) -> &mut Self { + self.suspected_resources.staging_buffers.clear(); + + self + } + + /// Identify resources to free, according to `trackers` and `self.suspected_resources`. + /// + /// Given `trackers`, the [`Tracker`] belonging to same [`Device`] as + /// `self`, and `hub`, the [`Hub`] to which that `Device` belongs: + /// + /// Remove from `trackers` each resource mentioned in + /// [`self.suspected_resources`]. If `trackers` held the final reference to + /// that resource, add it to the appropriate free list, to be destroyed by + /// the hal: + /// + /// - Add resources used by queue submissions still in flight to the + /// [`last_resources`] table of the last such submission's entry in + /// [`self.active`]. When that submission has finished execution. the + /// [`triage_submissions`] method will remove from the tracker and the + /// resource reference count will be responsible carrying out deallocation. + /// + /// ## Entrained resources + /// + /// This function finds resources that are used only by other resources + /// ready to be freed, and adds those to the free lists as well. For + /// example, if there's some texture `T` used only by some texture view + /// `TV`, then if `TV` can be freed, `T` gets added to the free lists too. + /// + /// Since `wgpu-core` resource ownership patterns are acyclic, we can visit + /// each type that can be owned after all types that could possibly own + /// it. This way, we can detect all free-able objects in a single pass, + /// simply by starting with types that are roots of the ownership DAG (like + /// render bundles) and working our way towards leaf types (like buffers). + /// + /// [`Device`]: super::Device + /// [`self.suspected_resources`]: LifetimeTracker::suspected_resources + /// [`last_resources`]: ActiveSubmission::last_resources + /// [`self.active`]: LifetimeTracker::active + /// [`triage_submissions`]: LifetimeTracker::triage_submissions + pub(crate) fn triage_suspected(&mut self, trackers: &Mutex<Tracker<A>>) { + profiling::scope!("triage_suspected"); + + //NOTE: the order is important to release resources that depends between each other! + self.triage_suspected_render_bundles(trackers); + self.triage_suspected_compute_pipelines(trackers); + self.triage_suspected_render_pipelines(trackers); + self.triage_suspected_bind_groups(trackers); + self.triage_suspected_pipeline_layouts(); + self.triage_suspected_bind_group_layouts(); + self.triage_suspected_query_sets(trackers); + self.triage_suspected_samplers(trackers); + self.triage_suspected_staging_buffers(); + self.triage_suspected_texture_views(trackers); + self.triage_suspected_textures(trackers); + self.triage_suspected_buffers(trackers); + self.triage_suspected_destroyed_buffers(); + self.triage_suspected_destroyed_textures(); + } + + /// Determine which buffers are ready to map, and which must wait for the + /// GPU. + /// + /// See the documentation for [`LifetimeTracker`] for details. + pub(crate) fn triage_mapped(&mut self) { + if self.mapped.is_empty() { + return; + } + + for buffer in self.mapped.drain(..) { + let submit_index = buffer.info.submission_index(); + log::trace!( + "Mapping of {:?} at submission {:?} gets assigned to active {:?}", + buffer.info.id(), + submit_index, + self.active.iter().position(|a| a.index == submit_index) + ); + + self.active + .iter_mut() + .find(|a| a.index == submit_index) + .map_or(&mut self.ready_to_map, |a| &mut a.mapped) + .push(buffer); + } + } + + /// Map the buffers in `self.ready_to_map`. + /// + /// Return a list of mapping notifications to send. + /// + /// See the documentation for [`LifetimeTracker`] for details. + #[must_use] + pub(crate) fn handle_mapping( + &mut self, + raw: &A::Device, + trackers: &Mutex<Tracker<A>>, + ) -> Vec<super::BufferMapPendingClosure> { + if self.ready_to_map.is_empty() { + return Vec::new(); + } + let mut pending_callbacks: Vec<super::BufferMapPendingClosure> = + Vec::with_capacity(self.ready_to_map.len()); + + for buffer in self.ready_to_map.drain(..) { + let buffer_id = buffer.info.id(); + let is_removed = { + let mut trackers = trackers.lock(); + trackers.buffers.remove_abandoned(buffer_id) + }; + if is_removed { + *buffer.map_state.lock() = resource::BufferMapState::Idle; + log::trace!("Buffer ready to map {:?} is not tracked anymore", buffer_id); + } else { + let mapping = match std::mem::replace( + &mut *buffer.map_state.lock(), + resource::BufferMapState::Idle, + ) { + resource::BufferMapState::Waiting(pending_mapping) => pending_mapping, + // Mapping cancelled + resource::BufferMapState::Idle => continue, + // Mapping queued at least twice by map -> unmap -> map + // and was already successfully mapped below + active @ resource::BufferMapState::Active { .. } => { + *buffer.map_state.lock() = active; + continue; + } + _ => panic!("No pending mapping."), + }; + let status = if mapping.range.start != mapping.range.end { + log::debug!("Buffer {:?} map state -> Active", buffer_id); + let host = mapping.op.host; + let size = mapping.range.end - mapping.range.start; + match super::map_buffer(raw, &buffer, mapping.range.start, size, host) { + Ok(ptr) => { + *buffer.map_state.lock() = resource::BufferMapState::Active { + ptr, + range: mapping.range.start..mapping.range.start + size, + host, + }; + Ok(()) + } + Err(e) => { + log::error!("Mapping failed: {e}"); + Err(e) + } + } + } else { + *buffer.map_state.lock() = resource::BufferMapState::Active { + ptr: std::ptr::NonNull::dangling(), + range: mapping.range, + host: mapping.op.host, + }; + Ok(()) + }; + pending_callbacks.push((mapping.op, status)); + } + } + pending_callbacks + } +} diff --git a/third_party/rust/wgpu-core/src/device/mod.rs b/third_party/rust/wgpu-core/src/device/mod.rs new file mode 100644 index 0000000000..7ecda830a3 --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/mod.rs @@ -0,0 +1,480 @@ +use crate::{ + binding_model, + hal_api::HalApi, + hub::Hub, + id::{BindGroupLayoutId, PipelineLayoutId}, + resource::{Buffer, BufferAccessResult}, + resource::{BufferAccessError, BufferMapOperation}, + resource_log, Label, DOWNLEVEL_ERROR_MESSAGE, +}; + +use arrayvec::ArrayVec; +use hal::Device as _; +use smallvec::SmallVec; +use std::os::raw::c_char; +use thiserror::Error; +use wgt::{BufferAddress, DeviceLostReason, TextureFormat}; + +use std::{iter, num::NonZeroU32, ptr}; + +pub mod any_device; +pub(crate) mod bgl; +pub mod global; +mod life; +pub mod queue; +pub mod resource; +#[cfg(any(feature = "trace", feature = "replay"))] +pub mod trace; +pub use {life::WaitIdleError, resource::Device}; + +pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES; +// Should be large enough for the largest possible texture row. This +// value is enough for a 16k texture with float4 format. +pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10; + +const CLEANUP_WAIT_MS: u32 = 5000; + +const IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL: &str = "Implicit BindGroupLayout in the Error State"; +const ENTRYPOINT_FAILURE_ERROR: &str = "The given EntryPoint is Invalid"; + +pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>; + +#[repr(C)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum HostMap { + Read, + Write, +} + +#[derive(Clone, Debug, Hash, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub(crate) struct AttachmentData<T> { + pub colors: ArrayVec<Option<T>, { hal::MAX_COLOR_ATTACHMENTS }>, + pub resolves: ArrayVec<T, { hal::MAX_COLOR_ATTACHMENTS }>, + pub depth_stencil: Option<T>, +} +impl<T: PartialEq> Eq for AttachmentData<T> {} +impl<T> AttachmentData<T> { + pub(crate) fn map<U, F: Fn(&T) -> U>(&self, fun: F) -> AttachmentData<U> { + AttachmentData { + colors: self.colors.iter().map(|c| c.as_ref().map(&fun)).collect(), + resolves: self.resolves.iter().map(&fun).collect(), + depth_stencil: self.depth_stencil.as_ref().map(&fun), + } + } +} + +#[derive(Debug, Copy, Clone)] +pub enum RenderPassCompatibilityCheckType { + RenderPipeline, + RenderBundle, +} + +#[derive(Clone, Debug, Hash, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub(crate) struct RenderPassContext { + pub attachments: AttachmentData<TextureFormat>, + pub sample_count: u32, + pub multiview: Option<NonZeroU32>, +} +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum RenderPassCompatibilityError { + #[error( + "Incompatible color attachments at indices {indices:?}: the RenderPass uses textures with formats {expected:?} but the {ty:?} uses attachments with formats {actual:?}", + )] + IncompatibleColorAttachment { + indices: Vec<usize>, + expected: Vec<Option<TextureFormat>>, + actual: Vec<Option<TextureFormat>>, + ty: RenderPassCompatibilityCheckType, + }, + #[error( + "Incompatible depth-stencil attachment format: the RenderPass uses a texture with format {expected:?} but the {ty:?} uses an attachment with format {actual:?}", + )] + IncompatibleDepthStencilAttachment { + expected: Option<TextureFormat>, + actual: Option<TextureFormat>, + ty: RenderPassCompatibilityCheckType, + }, + #[error( + "Incompatible sample count: the RenderPass uses textures with sample count {expected:?} but the {ty:?} uses attachments with format {actual:?}", + )] + IncompatibleSampleCount { + expected: u32, + actual: u32, + ty: RenderPassCompatibilityCheckType, + }, + #[error("Incompatible multiview setting: the RenderPass uses setting {expected:?} but the {ty:?} uses setting {actual:?}")] + IncompatibleMultiview { + expected: Option<NonZeroU32>, + actual: Option<NonZeroU32>, + ty: RenderPassCompatibilityCheckType, + }, +} + +impl RenderPassContext { + // Assumes the renderpass only contains one subpass + pub(crate) fn check_compatible( + &self, + other: &Self, + ty: RenderPassCompatibilityCheckType, + ) -> Result<(), RenderPassCompatibilityError> { + if self.attachments.colors != other.attachments.colors { + let indices = self + .attachments + .colors + .iter() + .zip(&other.attachments.colors) + .enumerate() + .filter_map(|(idx, (left, right))| (left != right).then_some(idx)) + .collect(); + return Err(RenderPassCompatibilityError::IncompatibleColorAttachment { + indices, + expected: self.attachments.colors.iter().cloned().collect(), + actual: other.attachments.colors.iter().cloned().collect(), + ty, + }); + } + if self.attachments.depth_stencil != other.attachments.depth_stencil { + return Err( + RenderPassCompatibilityError::IncompatibleDepthStencilAttachment { + expected: self.attachments.depth_stencil, + actual: other.attachments.depth_stencil, + ty, + }, + ); + } + if self.sample_count != other.sample_count { + return Err(RenderPassCompatibilityError::IncompatibleSampleCount { + expected: self.sample_count, + actual: other.sample_count, + ty, + }); + } + if self.multiview != other.multiview { + return Err(RenderPassCompatibilityError::IncompatibleMultiview { + expected: self.multiview, + actual: other.multiview, + ty, + }); + } + Ok(()) + } +} + +pub type BufferMapPendingClosure = (BufferMapOperation, BufferAccessResult); + +#[derive(Default)] +pub struct UserClosures { + pub mappings: Vec<BufferMapPendingClosure>, + pub submissions: SmallVec<[queue::SubmittedWorkDoneClosure; 1]>, + pub device_lost_invocations: SmallVec<[DeviceLostInvocation; 1]>, +} + +impl UserClosures { + fn extend(&mut self, other: Self) { + self.mappings.extend(other.mappings); + self.submissions.extend(other.submissions); + self.device_lost_invocations + .extend(other.device_lost_invocations); + } + + fn fire(self) { + // Note: this logic is specifically moved out of `handle_mapping()` in order to + // have nothing locked by the time we execute users callback code. + + // Mappings _must_ be fired before submissions, as the spec requires all mapping callbacks that are registered before + // a on_submitted_work_done callback to be fired before the on_submitted_work_done callback. + for (mut operation, status) in self.mappings { + if let Some(callback) = operation.callback.take() { + callback.call(status); + } + } + for closure in self.submissions { + closure.call(); + } + for invocation in self.device_lost_invocations { + invocation + .closure + .call(invocation.reason, invocation.message); + } + } +} + +#[cfg(send_sync)] +pub type DeviceLostCallback = Box<dyn Fn(DeviceLostReason, String) + Send + 'static>; +#[cfg(not(send_sync))] +pub type DeviceLostCallback = Box<dyn Fn(DeviceLostReason, String) + 'static>; + +pub struct DeviceLostClosureRust { + pub callback: DeviceLostCallback, + consumed: bool, +} + +impl Drop for DeviceLostClosureRust { + fn drop(&mut self) { + if !self.consumed { + panic!("DeviceLostClosureRust must be consumed before it is dropped."); + } + } +} + +#[repr(C)] +pub struct DeviceLostClosureC { + pub callback: unsafe extern "C" fn(user_data: *mut u8, reason: u8, message: *const c_char), + pub user_data: *mut u8, + consumed: bool, +} + +#[cfg(send_sync)] +unsafe impl Send for DeviceLostClosureC {} + +impl Drop for DeviceLostClosureC { + fn drop(&mut self) { + if !self.consumed { + panic!("DeviceLostClosureC must be consumed before it is dropped."); + } + } +} + +pub struct DeviceLostClosure { + // We wrap this so creating the enum in the C variant can be unsafe, + // allowing our call function to be safe. + inner: DeviceLostClosureInner, +} + +pub struct DeviceLostInvocation { + closure: DeviceLostClosure, + reason: DeviceLostReason, + message: String, +} + +enum DeviceLostClosureInner { + Rust { inner: DeviceLostClosureRust }, + C { inner: DeviceLostClosureC }, +} + +impl DeviceLostClosure { + pub fn from_rust(callback: DeviceLostCallback) -> Self { + let inner = DeviceLostClosureRust { + callback, + consumed: false, + }; + Self { + inner: DeviceLostClosureInner::Rust { inner }, + } + } + + /// # Safety + /// + /// - The callback pointer must be valid to call with the provided `user_data` + /// pointer. + /// + /// - Both pointers must point to `'static` data, as the callback may happen at + /// an unspecified time. + pub unsafe fn from_c(mut closure: DeviceLostClosureC) -> Self { + // Build an inner with the values from closure, ensuring that + // inner.consumed is false. + let inner = DeviceLostClosureC { + callback: closure.callback, + user_data: closure.user_data, + consumed: false, + }; + + // Mark the original closure as consumed, so we can safely drop it. + closure.consumed = true; + + Self { + inner: DeviceLostClosureInner::C { inner }, + } + } + + pub(crate) fn call(self, reason: DeviceLostReason, message: String) { + match self.inner { + DeviceLostClosureInner::Rust { mut inner } => { + inner.consumed = true; + + (inner.callback)(reason, message) + } + // SAFETY: the contract of the call to from_c says that this unsafe is sound. + DeviceLostClosureInner::C { mut inner } => unsafe { + inner.consumed = true; + + // Ensure message is structured as a null-terminated C string. It only + // needs to live as long as the callback invocation. + let message = std::ffi::CString::new(message).unwrap(); + (inner.callback)(inner.user_data, reason as u8, message.as_ptr()) + }, + } + } +} + +fn map_buffer<A: HalApi>( + raw: &A::Device, + buffer: &Buffer<A>, + offset: BufferAddress, + size: BufferAddress, + kind: HostMap, +) -> Result<ptr::NonNull<u8>, BufferAccessError> { + let snatch_guard = buffer.device.snatchable_lock.read(); + let raw_buffer = buffer + .raw(&snatch_guard) + .ok_or(BufferAccessError::Destroyed)?; + let mapping = unsafe { + raw.map_buffer(raw_buffer, offset..offset + size) + .map_err(DeviceError::from)? + }; + + *buffer.sync_mapped_writes.lock() = match kind { + HostMap::Read if !mapping.is_coherent => unsafe { + raw.invalidate_mapped_ranges(raw_buffer, iter::once(offset..offset + size)); + None + }, + HostMap::Write if !mapping.is_coherent => Some(offset..offset + size), + _ => None, + }; + + assert_eq!(offset % wgt::COPY_BUFFER_ALIGNMENT, 0); + assert_eq!(size % wgt::COPY_BUFFER_ALIGNMENT, 0); + // Zero out uninitialized parts of the mapping. (Spec dictates all resources + // behave as if they were initialized with zero) + // + // If this is a read mapping, ideally we would use a `clear_buffer` command + // before reading the data from GPU (i.e. `invalidate_range`). However, this + // would require us to kick off and wait for a command buffer or piggy back + // on an existing one (the later is likely the only worthwhile option). As + // reading uninitialized memory isn't a particular important path to + // support, we instead just initialize the memory here and make sure it is + // GPU visible, so this happens at max only once for every buffer region. + // + // If this is a write mapping zeroing out the memory here is the only + // reasonable way as all data is pushed to GPU anyways. + + // No need to flush if it is flushed later anyways. + let zero_init_needs_flush_now = + mapping.is_coherent && buffer.sync_mapped_writes.lock().is_none(); + let mapped = unsafe { std::slice::from_raw_parts_mut(mapping.ptr.as_ptr(), size as usize) }; + + for uninitialized in buffer + .initialization_status + .write() + .drain(offset..(size + offset)) + { + // The mapping's pointer is already offset, however we track the + // uninitialized range relative to the buffer's start. + let fill_range = + (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize; + mapped[fill_range].fill(0); + + if zero_init_needs_flush_now { + unsafe { raw.flush_mapped_ranges(raw_buffer, iter::once(uninitialized)) }; + } + } + + Ok(mapping.ptr) +} + +pub(crate) struct CommandAllocator<A: HalApi> { + free_encoders: Vec<A::CommandEncoder>, +} + +impl<A: HalApi> CommandAllocator<A> { + fn acquire_encoder( + &mut self, + device: &A::Device, + queue: &A::Queue, + ) -> Result<A::CommandEncoder, hal::DeviceError> { + match self.free_encoders.pop() { + Some(encoder) => Ok(encoder), + None => unsafe { + let hal_desc = hal::CommandEncoderDescriptor { label: None, queue }; + device.create_command_encoder(&hal_desc) + }, + } + } + + fn release_encoder(&mut self, encoder: A::CommandEncoder) { + self.free_encoders.push(encoder); + } + + fn dispose(self, device: &A::Device) { + resource_log!( + "CommandAllocator::dispose encoders {}", + self.free_encoders.len() + ); + for cmd_encoder in self.free_encoders { + unsafe { + device.destroy_command_encoder(cmd_encoder); + } + } + } +} + +#[derive(Clone, Debug, Error)] +#[error("Device is invalid")] +pub struct InvalidDevice; + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum DeviceError { + #[error("Parent device is invalid.")] + Invalid, + #[error("Parent device is lost")] + Lost, + #[error("Not enough memory left.")] + OutOfMemory, + #[error("Creation of a resource failed for a reason other than running out of memory.")] + ResourceCreationFailed, + #[error("QueueId is invalid")] + InvalidQueueId, + #[error("Attempt to use a resource with a different device from the one that created it")] + WrongDevice, +} + +impl From<hal::DeviceError> for DeviceError { + fn from(error: hal::DeviceError) -> Self { + match error { + hal::DeviceError::Lost => DeviceError::Lost, + hal::DeviceError::OutOfMemory => DeviceError::OutOfMemory, + hal::DeviceError::ResourceCreationFailed => DeviceError::ResourceCreationFailed, + } + } +} + +#[derive(Clone, Debug, Error)] +#[error("Features {0:?} are required but not enabled on the device")] +pub struct MissingFeatures(pub wgt::Features); + +#[derive(Clone, Debug, Error)] +#[error( + "Downlevel flags {0:?} are required but not supported on the device.\n{}", + DOWNLEVEL_ERROR_MESSAGE +)] +pub struct MissingDownlevelFlags(pub wgt::DownlevelFlags); + +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ImplicitPipelineContext { + pub root_id: PipelineLayoutId, + pub group_ids: ArrayVec<BindGroupLayoutId, { hal::MAX_BIND_GROUPS }>, +} + +pub struct ImplicitPipelineIds<'a> { + pub root_id: Option<PipelineLayoutId>, + pub group_ids: &'a [Option<BindGroupLayoutId>], +} + +impl ImplicitPipelineIds<'_> { + fn prepare<A: HalApi>(self, hub: &Hub<A>) -> ImplicitPipelineContext { + ImplicitPipelineContext { + root_id: hub.pipeline_layouts.prepare(self.root_id).into_id(), + group_ids: self + .group_ids + .iter() + .map(|id_in| hub.bind_group_layouts.prepare(*id_in).into_id()) + .collect(), + } + } +} diff --git a/third_party/rust/wgpu-core/src/device/queue.rs b/third_party/rust/wgpu-core/src/device/queue.rs new file mode 100644 index 0000000000..08c5b767b6 --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/queue.rs @@ -0,0 +1,1569 @@ +#[cfg(feature = "trace")] +use crate::device::trace::Action; +use crate::{ + api_log, + command::{ + extract_texture_selector, validate_linear_texture_data, validate_texture_copy_range, + ClearError, CommandBuffer, CopySide, ImageCopyTexture, TransferError, + }, + conv, + device::{life::ResourceMaps, DeviceError, WaitIdleError}, + get_lowest_common_denom, + global::Global, + hal_api::HalApi, + hal_label, + id::{self, QueueId}, + init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange}, + resource::{ + Buffer, BufferAccessError, BufferMapState, DestroyedBuffer, DestroyedTexture, Resource, + ResourceInfo, ResourceType, StagingBuffer, Texture, TextureInner, + }, + resource_log, track, FastHashMap, SubmissionIndex, +}; + +use hal::{CommandEncoder as _, Device as _, Queue as _}; +use parking_lot::Mutex; +use smallvec::SmallVec; + +use std::{ + iter, mem, ptr, + sync::{atomic::Ordering, Arc}, +}; +use thiserror::Error; + +use super::Device; + +pub struct Queue<A: HalApi> { + pub device: Option<Arc<Device<A>>>, + pub raw: Option<A::Queue>, + pub info: ResourceInfo<Queue<A>>, +} + +impl<A: HalApi> Resource for Queue<A> { + const TYPE: ResourceType = "Queue"; + + type Marker = crate::id::markers::Queue; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +impl<A: HalApi> Drop for Queue<A> { + fn drop(&mut self) { + let queue = self.raw.take().unwrap(); + self.device.as_ref().unwrap().release_queue(queue); + } +} + +/// Number of command buffers that we generate from the same pool +/// for the write_xxx commands, before the pool is recycled. +/// +/// If we don't stop at some point, the pool will grow forever, +/// without a concrete moment of when it can be cleared. +const WRITE_COMMAND_BUFFERS_PER_POOL: usize = 64; + +#[repr(C)] +pub struct SubmittedWorkDoneClosureC { + pub callback: unsafe extern "C" fn(user_data: *mut u8), + pub user_data: *mut u8, +} + +#[cfg(send_sync)] +unsafe impl Send for SubmittedWorkDoneClosureC {} + +pub struct SubmittedWorkDoneClosure { + // We wrap this so creating the enum in the C variant can be unsafe, + // allowing our call function to be safe. + inner: SubmittedWorkDoneClosureInner, +} + +#[cfg(send_sync)] +type SubmittedWorkDoneCallback = Box<dyn FnOnce() + Send + 'static>; +#[cfg(not(send_sync))] +type SubmittedWorkDoneCallback = Box<dyn FnOnce() + 'static>; + +enum SubmittedWorkDoneClosureInner { + Rust { callback: SubmittedWorkDoneCallback }, + C { inner: SubmittedWorkDoneClosureC }, +} + +impl SubmittedWorkDoneClosure { + pub fn from_rust(callback: SubmittedWorkDoneCallback) -> Self { + Self { + inner: SubmittedWorkDoneClosureInner::Rust { callback }, + } + } + + /// # Safety + /// + /// - The callback pointer must be valid to call with the provided `user_data` + /// pointer. + /// + /// - Both pointers must point to `'static` data, as the callback may happen at + /// an unspecified time. + pub unsafe fn from_c(inner: SubmittedWorkDoneClosureC) -> Self { + Self { + inner: SubmittedWorkDoneClosureInner::C { inner }, + } + } + + pub(crate) fn call(self) { + match self.inner { + SubmittedWorkDoneClosureInner::Rust { callback } => callback(), + // SAFETY: the contract of the call to from_c says that this unsafe is sound. + SubmittedWorkDoneClosureInner::C { inner } => unsafe { + (inner.callback)(inner.user_data) + }, + } + } +} + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct WrappedSubmissionIndex { + pub queue_id: QueueId, + pub index: SubmissionIndex, +} + +/// A texture or buffer to be freed soon. +/// +/// This is just a tagged raw texture or buffer, generally about to be added to +/// some other more specific container like: +/// +/// - `PendingWrites::temp_resources`: resources used by queue writes and +/// unmaps, waiting to be folded in with the next queue submission +/// +/// - `ActiveSubmission::last_resources`: temporary resources used by a queue +/// submission, to be freed when it completes +/// +/// - `LifetimeTracker::free_resources`: resources to be freed in the next +/// `maintain` call, no longer used anywhere +#[derive(Debug)] +pub enum TempResource<A: HalApi> { + Buffer(Arc<Buffer<A>>), + StagingBuffer(Arc<StagingBuffer<A>>), + DestroyedBuffer(Arc<DestroyedBuffer<A>>), + DestroyedTexture(Arc<DestroyedTexture<A>>), + Texture(Arc<Texture<A>>), +} + +/// A queue execution for a particular command encoder. +pub(crate) struct EncoderInFlight<A: HalApi> { + raw: A::CommandEncoder, + cmd_buffers: Vec<A::CommandBuffer>, +} + +impl<A: HalApi> EncoderInFlight<A> { + pub(crate) unsafe fn land(mut self) -> A::CommandEncoder { + unsafe { self.raw.reset_all(self.cmd_buffers.into_iter()) }; + self.raw + } +} + +/// A private command encoder for writes made directly on the device +/// or queue. +/// +/// Operations like `buffer_unmap`, `queue_write_buffer`, and +/// `queue_write_texture` need to copy data to the GPU. At the hal +/// level, this must be done by encoding and submitting commands, but +/// these operations are not associated with any specific wgpu command +/// buffer. +/// +/// Instead, `Device::pending_writes` owns one of these values, which +/// has its own hal command encoder and resource lists. The commands +/// accumulated here are automatically submitted to the queue the next +/// time the user submits a wgpu command buffer, ahead of the user's +/// commands. +/// +/// Important: +/// When locking pending_writes be sure that tracker is not locked +/// and try to lock trackers for the minimum timespan possible +/// +/// All uses of [`StagingBuffer`]s end up here. +#[derive(Debug)] +pub(crate) struct PendingWrites<A: HalApi> { + pub command_encoder: A::CommandEncoder, + pub is_active: bool, + pub temp_resources: Vec<TempResource<A>>, + pub dst_buffers: FastHashMap<id::BufferId, Arc<Buffer<A>>>, + pub dst_textures: FastHashMap<id::TextureId, Arc<Texture<A>>>, + pub executing_command_buffers: Vec<A::CommandBuffer>, +} + +impl<A: HalApi> PendingWrites<A> { + pub fn new(command_encoder: A::CommandEncoder) -> Self { + Self { + command_encoder, + is_active: false, + temp_resources: Vec::new(), + dst_buffers: FastHashMap::default(), + dst_textures: FastHashMap::default(), + executing_command_buffers: Vec::new(), + } + } + + pub fn dispose(mut self, device: &A::Device) { + unsafe { + if self.is_active { + self.command_encoder.discard_encoding(); + } + self.command_encoder + .reset_all(self.executing_command_buffers.into_iter()); + device.destroy_command_encoder(self.command_encoder); + } + + self.temp_resources.clear(); + } + + pub fn consume_temp(&mut self, resource: TempResource<A>) { + self.temp_resources.push(resource); + } + + fn consume(&mut self, buffer: Arc<StagingBuffer<A>>) { + self.temp_resources + .push(TempResource::StagingBuffer(buffer)); + } + + fn pre_submit(&mut self) -> Result<Option<&A::CommandBuffer>, DeviceError> { + self.dst_buffers.clear(); + self.dst_textures.clear(); + if self.is_active { + let cmd_buf = unsafe { self.command_encoder.end_encoding()? }; + self.is_active = false; + self.executing_command_buffers.push(cmd_buf); + + return Ok(self.executing_command_buffers.last()); + } + + Ok(None) + } + + #[must_use] + fn post_submit( + &mut self, + command_allocator: &mut super::CommandAllocator<A>, + device: &A::Device, + queue: &A::Queue, + ) -> Option<EncoderInFlight<A>> { + if self.executing_command_buffers.len() >= WRITE_COMMAND_BUFFERS_PER_POOL { + let new_encoder = command_allocator.acquire_encoder(device, queue).unwrap(); + Some(EncoderInFlight { + raw: mem::replace(&mut self.command_encoder, new_encoder), + cmd_buffers: mem::take(&mut self.executing_command_buffers), + }) + } else { + None + } + } + + pub fn activate(&mut self) -> &mut A::CommandEncoder { + if !self.is_active { + unsafe { + self.command_encoder + .begin_encoding(Some("(wgpu internal) PendingWrites")) + .unwrap(); + } + self.is_active = true; + } + &mut self.command_encoder + } + + pub fn deactivate(&mut self) { + if self.is_active { + unsafe { + self.command_encoder.discard_encoding(); + } + self.is_active = false; + } + } +} + +fn prepare_staging_buffer<A: HalApi>( + device: &Arc<Device<A>>, + size: wgt::BufferAddress, + instance_flags: wgt::InstanceFlags, +) -> Result<(StagingBuffer<A>, *mut u8), DeviceError> { + profiling::scope!("prepare_staging_buffer"); + let stage_desc = hal::BufferDescriptor { + label: hal_label(Some("(wgpu internal) Staging"), instance_flags), + size, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC, + memory_flags: hal::MemoryFlags::TRANSIENT, + }; + + let buffer = unsafe { device.raw().create_buffer(&stage_desc)? }; + let mapping = unsafe { device.raw().map_buffer(&buffer, 0..size) }?; + + let staging_buffer = StagingBuffer { + raw: Mutex::new(Some(buffer)), + device: device.clone(), + size, + info: ResourceInfo::new("<StagingBuffer>"), + is_coherent: mapping.is_coherent, + }; + + Ok((staging_buffer, mapping.ptr.as_ptr())) +} + +impl<A: HalApi> StagingBuffer<A> { + unsafe fn flush(&self, device: &A::Device) -> Result<(), DeviceError> { + if !self.is_coherent { + unsafe { + device.flush_mapped_ranges( + self.raw.lock().as_ref().unwrap(), + iter::once(0..self.size), + ) + }; + } + unsafe { device.unmap_buffer(self.raw.lock().as_ref().unwrap())? }; + Ok(()) + } +} + +#[derive(Clone, Debug, Error)] +#[error("Queue is invalid")] +pub struct InvalidQueue; + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum QueueWriteError { + #[error(transparent)] + Queue(#[from] DeviceError), + #[error(transparent)] + Transfer(#[from] TransferError), + #[error(transparent)] + MemoryInitFailure(#[from] ClearError), +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum QueueSubmitError { + #[error(transparent)] + Queue(#[from] DeviceError), + #[error("Buffer {0:?} is destroyed")] + DestroyedBuffer(id::BufferId), + #[error("Texture {0:?} is destroyed")] + DestroyedTexture(id::TextureId), + #[error(transparent)] + Unmap(#[from] BufferAccessError), + #[error("Buffer {0:?} is still mapped")] + BufferStillMapped(id::BufferId), + #[error("Surface output was dropped before the command buffer got submitted")] + SurfaceOutputDropped, + #[error("Surface was unconfigured before the command buffer got submitted")] + SurfaceUnconfigured, + #[error("GPU got stuck :(")] + StuckGpu, +} + +//TODO: move out common parts of write_xxx. + +impl Global { + pub fn queue_write_buffer<A: HalApi>( + &self, + queue_id: QueueId, + buffer_id: id::BufferId, + buffer_offset: wgt::BufferAddress, + data: &[u8], + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::write_buffer"); + api_log!("Queue::write_buffer {buffer_id:?} {}bytes", data.len()); + + let hub = A::hub(self); + + let queue = hub + .queues + .get(queue_id) + .map_err(|_| DeviceError::InvalidQueueId)?; + + let device = queue.device.as_ref().unwrap(); + + let data_size = data.len() as wgt::BufferAddress; + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + let data_path = trace.make_binary("bin", data); + trace.add(Action::WriteBuffer { + id: buffer_id, + data: data_path, + range: buffer_offset..buffer_offset + data_size, + queued: true, + }); + } + + if data_size == 0 { + log::trace!("Ignoring write_buffer of size 0"); + return Ok(()); + } + + // Platform validation requires that the staging buffer always be + // freed, even if an error occurs. All paths from here must call + // `device.pending_writes.consume`. + let (staging_buffer, staging_buffer_ptr) = + prepare_staging_buffer(device, data_size, device.instance_flags)?; + let mut pending_writes = device.pending_writes.lock(); + let pending_writes = pending_writes.as_mut().unwrap(); + + let stage_fid = hub.staging_buffers.request(); + let staging_buffer = stage_fid.init(staging_buffer); + + if let Err(flush_error) = unsafe { + profiling::scope!("copy"); + ptr::copy_nonoverlapping(data.as_ptr(), staging_buffer_ptr, data.len()); + staging_buffer.flush(device.raw()) + } { + pending_writes.consume(staging_buffer); + return Err(flush_error.into()); + } + + let result = self.queue_write_staging_buffer_impl( + device, + pending_writes, + &staging_buffer, + buffer_id, + buffer_offset, + ); + + pending_writes.consume(staging_buffer); + result + } + + pub fn queue_create_staging_buffer<A: HalApi>( + &self, + queue_id: QueueId, + buffer_size: wgt::BufferSize, + id_in: Option<id::StagingBufferId>, + ) -> Result<(id::StagingBufferId, *mut u8), QueueWriteError> { + profiling::scope!("Queue::create_staging_buffer"); + let hub = A::hub(self); + + let queue = hub + .queues + .get(queue_id) + .map_err(|_| DeviceError::InvalidQueueId)?; + + let device = queue.device.as_ref().unwrap(); + + let (staging_buffer, staging_buffer_ptr) = + prepare_staging_buffer(device, buffer_size.get(), device.instance_flags)?; + + let fid = hub.staging_buffers.prepare(id_in); + let (id, _) = fid.assign(staging_buffer); + resource_log!("Queue::create_staging_buffer {id:?}"); + + Ok((id, staging_buffer_ptr)) + } + + pub fn queue_write_staging_buffer<A: HalApi>( + &self, + queue_id: QueueId, + buffer_id: id::BufferId, + buffer_offset: wgt::BufferAddress, + staging_buffer_id: id::StagingBufferId, + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::write_staging_buffer"); + let hub = A::hub(self); + + let queue = hub + .queues + .get(queue_id) + .map_err(|_| DeviceError::InvalidQueueId)?; + + let device = queue.device.as_ref().unwrap(); + + let staging_buffer = hub.staging_buffers.unregister(staging_buffer_id); + if staging_buffer.is_none() { + return Err(QueueWriteError::Transfer(TransferError::InvalidBuffer( + buffer_id, + ))); + } + let staging_buffer = staging_buffer.unwrap(); + let mut pending_writes = device.pending_writes.lock(); + let pending_writes = pending_writes.as_mut().unwrap(); + + // At this point, we have taken ownership of the staging_buffer from the + // user. Platform validation requires that the staging buffer always + // be freed, even if an error occurs. All paths from here must call + // `device.pending_writes.consume`. + if let Err(flush_error) = unsafe { staging_buffer.flush(device.raw()) } { + pending_writes.consume(staging_buffer); + return Err(flush_error.into()); + } + + let result = self.queue_write_staging_buffer_impl( + device, + pending_writes, + &staging_buffer, + buffer_id, + buffer_offset, + ); + + pending_writes.consume(staging_buffer); + result + } + + pub fn queue_validate_write_buffer<A: HalApi>( + &self, + _queue_id: QueueId, + buffer_id: id::BufferId, + buffer_offset: u64, + buffer_size: u64, + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::validate_write_buffer"); + let hub = A::hub(self); + + let buffer = hub + .buffers + .get(buffer_id) + .map_err(|_| TransferError::InvalidBuffer(buffer_id))?; + + self.queue_validate_write_buffer_impl(&buffer, buffer_id, buffer_offset, buffer_size)?; + + Ok(()) + } + + fn queue_validate_write_buffer_impl<A: HalApi>( + &self, + buffer: &Buffer<A>, + buffer_id: id::BufferId, + buffer_offset: u64, + buffer_size: u64, + ) -> Result<(), TransferError> { + if !buffer.usage.contains(wgt::BufferUsages::COPY_DST) { + return Err(TransferError::MissingCopyDstUsageFlag( + Some(buffer_id), + None, + )); + } + if buffer_size % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedCopySize(buffer_size)); + } + if buffer_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedBufferOffset(buffer_offset)); + } + if buffer_offset + buffer_size > buffer.size { + return Err(TransferError::BufferOverrun { + start_offset: buffer_offset, + end_offset: buffer_offset + buffer_size, + buffer_size: buffer.size, + side: CopySide::Destination, + }); + } + + Ok(()) + } + + fn queue_write_staging_buffer_impl<A: HalApi>( + &self, + device: &Device<A>, + pending_writes: &mut PendingWrites<A>, + staging_buffer: &StagingBuffer<A>, + buffer_id: id::BufferId, + buffer_offset: u64, + ) -> Result<(), QueueWriteError> { + let hub = A::hub(self); + + let (dst, transition) = { + let buffer_guard = hub.buffers.read(); + let dst = buffer_guard + .get(buffer_id) + .map_err(|_| TransferError::InvalidBuffer(buffer_id))?; + let mut trackers = device.trackers.lock(); + trackers + .buffers + .set_single(dst, hal::BufferUses::COPY_DST) + .ok_or(TransferError::InvalidBuffer(buffer_id))? + }; + let snatch_guard = device.snatchable_lock.read(); + let dst_raw = dst + .raw + .get(&snatch_guard) + .ok_or(TransferError::InvalidBuffer(buffer_id))?; + + if dst.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + + let src_buffer_size = staging_buffer.size; + self.queue_validate_write_buffer_impl(&dst, buffer_id, buffer_offset, src_buffer_size)?; + + dst.info + .use_at(device.active_submission_index.load(Ordering::Relaxed) + 1); + + let region = wgt::BufferSize::new(src_buffer_size).map(|size| hal::BufferCopy { + src_offset: 0, + dst_offset: buffer_offset, + size, + }); + let inner_buffer = staging_buffer.raw.lock(); + let barriers = iter::once(hal::BufferBarrier { + buffer: inner_buffer.as_ref().unwrap(), + usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, + }) + .chain(transition.map(|pending| pending.into_hal(&dst, &snatch_guard))); + let encoder = pending_writes.activate(); + unsafe { + encoder.transition_buffers(barriers); + encoder.copy_buffer_to_buffer( + inner_buffer.as_ref().unwrap(), + dst_raw, + region.into_iter(), + ); + } + let dst = hub.buffers.get(buffer_id).unwrap(); + pending_writes.dst_buffers.insert(buffer_id, dst.clone()); + + // Ensure the overwritten bytes are marked as initialized so + // they don't need to be nulled prior to mapping or binding. + { + dst.initialization_status + .write() + .drain(buffer_offset..(buffer_offset + src_buffer_size)); + } + + Ok(()) + } + + pub fn queue_write_texture<A: HalApi>( + &self, + queue_id: QueueId, + destination: &ImageCopyTexture, + data: &[u8], + data_layout: &wgt::ImageDataLayout, + size: &wgt::Extent3d, + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::write_texture"); + api_log!("Queue::write_texture {:?} {size:?}", destination.texture); + + let hub = A::hub(self); + + let queue = hub + .queues + .get(queue_id) + .map_err(|_| DeviceError::InvalidQueueId)?; + + let device = queue.device.as_ref().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + let data_path = trace.make_binary("bin", data); + trace.add(Action::WriteTexture { + to: *destination, + data: data_path, + layout: *data_layout, + size: *size, + }); + } + + if size.width == 0 || size.height == 0 || size.depth_or_array_layers == 0 { + log::trace!("Ignoring write_texture of size 0"); + return Ok(()); + } + + let dst = hub + .textures + .get(destination.texture) + .map_err(|_| TransferError::InvalidTexture(destination.texture))?; + + if dst.device.as_info().id() != queue_id.transmute() { + return Err(DeviceError::WrongDevice.into()); + } + + if !dst.desc.usage.contains(wgt::TextureUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), + ); + } + + // Note: Doing the copy range validation early is important because ensures that the + // dimensions are not going to cause overflow in other parts of the validation. + let (hal_copy_size, array_layer_count) = + validate_texture_copy_range(destination, &dst.desc, CopySide::Destination, size)?; + + let (selector, dst_base) = extract_texture_selector(destination, size, &dst)?; + + if !dst_base.aspect.is_one() { + return Err(TransferError::CopyAspectNotOne.into()); + } + + if !conv::is_valid_copy_dst_texture_format(dst.desc.format, destination.aspect) { + return Err(TransferError::CopyToForbiddenTextureFormat { + format: dst.desc.format, + aspect: destination.aspect, + } + .into()); + } + + // Note: `_source_bytes_per_array_layer` is ignored since we + // have a staging copy, and it can have a different value. + let (_, _source_bytes_per_array_layer) = validate_linear_texture_data( + data_layout, + dst.desc.format, + destination.aspect, + data.len() as wgt::BufferAddress, + CopySide::Source, + size, + false, + )?; + + if dst.desc.format.is_depth_stencil_format() { + device + .require_downlevel_flags(wgt::DownlevelFlags::DEPTH_TEXTURE_AND_BUFFER_COPIES) + .map_err(TransferError::from)?; + } + + let (block_width, block_height) = dst.desc.format.block_dimensions(); + let width_blocks = size.width / block_width; + let height_blocks = size.height / block_height; + + let block_rows_per_image = data_layout.rows_per_image.unwrap_or( + // doesn't really matter because we need this only if we copy + // more than one layer, and then we validate for this being not + // None + height_blocks, + ); + + let block_size = dst + .desc + .format + .block_copy_size(Some(destination.aspect)) + .unwrap(); + let bytes_per_row_alignment = + get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size); + let stage_bytes_per_row = + wgt::math::align_to(block_size * width_blocks, bytes_per_row_alignment); + + let block_rows_in_copy = + (size.depth_or_array_layers - 1) * block_rows_per_image + height_blocks; + let stage_size = stage_bytes_per_row as u64 * block_rows_in_copy as u64; + + let mut pending_writes = device.pending_writes.lock(); + let pending_writes = pending_writes.as_mut().unwrap(); + let encoder = pending_writes.activate(); + + // If the copy does not fully cover the layers, we need to initialize to + // zero *first* as we don't keep track of partial texture layer inits. + // + // Strictly speaking we only need to clear the areas of a layer + // untouched, but this would get increasingly messy. + let init_layer_range = if dst.desc.dimension == wgt::TextureDimension::D3 { + // volume textures don't have a layer range as array volumes aren't supported + 0..1 + } else { + destination.origin.z..destination.origin.z + size.depth_or_array_layers + }; + let mut dst_initialization_status = dst.initialization_status.write(); + if dst_initialization_status.mips[destination.mip_level as usize] + .check(init_layer_range.clone()) + .is_some() + { + if has_copy_partial_init_tracker_coverage(size, destination.mip_level, &dst.desc) { + for layer_range in dst_initialization_status.mips[destination.mip_level as usize] + .drain(init_layer_range) + .collect::<Vec<std::ops::Range<u32>>>() + { + let mut trackers = device.trackers.lock(); + crate::command::clear_texture( + &dst, + TextureInitRange { + mip_range: destination.mip_level..(destination.mip_level + 1), + layer_range, + }, + encoder, + &mut trackers.textures, + &device.alignments, + device.zero_buffer.as_ref().unwrap(), + ) + .map_err(QueueWriteError::from)?; + } + } else { + dst_initialization_status.mips[destination.mip_level as usize] + .drain(init_layer_range); + } + } + + let snatch_guard = device.snatchable_lock.read(); + + // Re-get `dst` immutably here, so that the mutable borrow of the + // `texture_guard.get` above ends in time for the `clear_texture` + // call above. Since we've held `texture_guard` the whole time, we know + // the texture hasn't gone away in the mean time, so we can unwrap. + let dst = hub.textures.get(destination.texture).unwrap(); + dst.info + .use_at(device.active_submission_index.load(Ordering::Relaxed) + 1); + + let dst_raw = dst + .raw(&snatch_guard) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + + let bytes_per_row = data_layout + .bytes_per_row + .unwrap_or(width_blocks * block_size); + + // Platform validation requires that the staging buffer always be + // freed, even if an error occurs. All paths from here must call + // `device.pending_writes.consume`. + let (staging_buffer, staging_buffer_ptr) = + prepare_staging_buffer(device, stage_size, device.instance_flags)?; + + let stage_fid = hub.staging_buffers.request(); + let staging_buffer = stage_fid.init(staging_buffer); + + if stage_bytes_per_row == bytes_per_row { + profiling::scope!("copy aligned"); + // Fast path if the data is already being aligned optimally. + unsafe { + ptr::copy_nonoverlapping( + data.as_ptr().offset(data_layout.offset as isize), + staging_buffer_ptr, + stage_size as usize, + ); + } + } else { + profiling::scope!("copy chunked"); + // Copy row by row into the optimal alignment. + let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize; + for layer in 0..size.depth_or_array_layers { + let rows_offset = layer * block_rows_per_image; + for row in 0..height_blocks { + unsafe { + ptr::copy_nonoverlapping( + data.as_ptr().offset( + data_layout.offset as isize + + (rows_offset + row) as isize * bytes_per_row as isize, + ), + staging_buffer_ptr.offset( + (rows_offset + row) as isize * stage_bytes_per_row as isize, + ), + copy_bytes_per_row, + ); + } + } + } + } + + if let Err(e) = unsafe { staging_buffer.flush(device.raw()) } { + pending_writes.consume(staging_buffer); + return Err(e.into()); + } + + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = dst_base.clone(); + texture_base.array_layer += rel_array_layer; + hal::BufferTextureCopy { + buffer_layout: wgt::ImageDataLayout { + offset: rel_array_layer as u64 + * block_rows_per_image as u64 + * stage_bytes_per_row as u64, + bytes_per_row: Some(stage_bytes_per_row), + rows_per_image: Some(block_rows_per_image), + }, + texture_base, + size: hal_copy_size, + } + }); + + { + let inner_buffer = staging_buffer.raw.lock(); + let barrier = hal::BufferBarrier { + buffer: inner_buffer.as_ref().unwrap(), + usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, + }; + + let mut trackers = device.trackers.lock(); + let transition = trackers + .textures + .set_single(&dst, selector, hal::TextureUses::COPY_DST) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + unsafe { + encoder.transition_textures(transition.map(|pending| pending.into_hal(dst_raw))); + encoder.transition_buffers(iter::once(barrier)); + encoder.copy_buffer_to_texture(inner_buffer.as_ref().unwrap(), dst_raw, regions); + } + } + + pending_writes.consume(staging_buffer); + pending_writes + .dst_textures + .insert(destination.texture, dst.clone()); + + Ok(()) + } + + #[cfg(webgl)] + pub fn queue_copy_external_image_to_texture<A: HalApi>( + &self, + queue_id: QueueId, + source: &wgt::ImageCopyExternalImage, + destination: crate::command::ImageCopyTextureTagged, + size: wgt::Extent3d, + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::copy_external_image_to_texture"); + + let hub = A::hub(self); + + let queue = hub + .queues + .get(queue_id) + .map_err(|_| DeviceError::InvalidQueueId)?; + + let device = queue.device.as_ref().unwrap(); + + if size.width == 0 || size.height == 0 || size.depth_or_array_layers == 0 { + log::trace!("Ignoring write_texture of size 0"); + return Ok(()); + } + + let mut needs_flag = false; + needs_flag |= matches!(source.source, wgt::ExternalImageSource::OffscreenCanvas(_)); + needs_flag |= source.origin != wgt::Origin2d::ZERO; + needs_flag |= destination.color_space != wgt::PredefinedColorSpace::Srgb; + #[allow(clippy::bool_comparison)] + if matches!(source.source, wgt::ExternalImageSource::ImageBitmap(_)) { + needs_flag |= source.flip_y != false; + needs_flag |= destination.premultiplied_alpha != false; + } + + if needs_flag { + device + .require_downlevel_flags(wgt::DownlevelFlags::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES) + .map_err(TransferError::from)?; + } + + let src_width = source.source.width(); + let src_height = source.source.height(); + + let dst = hub.textures.get(destination.texture).unwrap(); + + if !conv::is_valid_external_image_copy_dst_texture_format(dst.desc.format) { + return Err( + TransferError::ExternalCopyToForbiddenTextureFormat(dst.desc.format).into(), + ); + } + if dst.desc.dimension != wgt::TextureDimension::D2 { + return Err(TransferError::InvalidDimensionExternal(destination.texture).into()); + } + if !dst.desc.usage.contains(wgt::TextureUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), + ); + } + if !dst + .desc + .usage + .contains(wgt::TextureUsages::RENDER_ATTACHMENT) + { + return Err( + TransferError::MissingRenderAttachmentUsageFlag(destination.texture).into(), + ); + } + if dst.desc.sample_count != 1 { + return Err(TransferError::InvalidSampleCount { + sample_count: dst.desc.sample_count, + } + .into()); + } + + if source.origin.x + size.width > src_width { + return Err(TransferError::TextureOverrun { + start_offset: source.origin.x, + end_offset: source.origin.x + size.width, + texture_size: src_width, + dimension: crate::resource::TextureErrorDimension::X, + side: CopySide::Source, + } + .into()); + } + if source.origin.y + size.height > src_height { + return Err(TransferError::TextureOverrun { + start_offset: source.origin.y, + end_offset: source.origin.y + size.height, + texture_size: src_height, + dimension: crate::resource::TextureErrorDimension::Y, + side: CopySide::Source, + } + .into()); + } + if size.depth_or_array_layers != 1 { + return Err(TransferError::TextureOverrun { + start_offset: 0, + end_offset: size.depth_or_array_layers, + texture_size: 1, + dimension: crate::resource::TextureErrorDimension::Z, + side: CopySide::Source, + } + .into()); + } + + // Note: Doing the copy range validation early is important because ensures that the + // dimensions are not going to cause overflow in other parts of the validation. + let (hal_copy_size, _) = validate_texture_copy_range( + &destination.to_untagged(), + &dst.desc, + CopySide::Destination, + &size, + )?; + + let (selector, dst_base) = + extract_texture_selector(&destination.to_untagged(), &size, &dst)?; + + let mut pending_writes = device.pending_writes.lock(); + let encoder = pending_writes.as_mut().unwrap().activate(); + + // If the copy does not fully cover the layers, we need to initialize to + // zero *first* as we don't keep track of partial texture layer inits. + // + // Strictly speaking we only need to clear the areas of a layer + // untouched, but this would get increasingly messy. + let init_layer_range = if dst.desc.dimension == wgt::TextureDimension::D3 { + // volume textures don't have a layer range as array volumes aren't supported + 0..1 + } else { + destination.origin.z..destination.origin.z + size.depth_or_array_layers + }; + let mut dst_initialization_status = dst.initialization_status.write(); + if dst_initialization_status.mips[destination.mip_level as usize] + .check(init_layer_range.clone()) + .is_some() + { + if has_copy_partial_init_tracker_coverage(&size, destination.mip_level, &dst.desc) { + for layer_range in dst_initialization_status.mips[destination.mip_level as usize] + .drain(init_layer_range) + .collect::<Vec<std::ops::Range<u32>>>() + { + let mut trackers = device.trackers.lock(); + crate::command::clear_texture( + &dst, + TextureInitRange { + mip_range: destination.mip_level..(destination.mip_level + 1), + layer_range, + }, + encoder, + &mut trackers.textures, + &device.alignments, + device.zero_buffer.as_ref().unwrap(), + ) + .map_err(QueueWriteError::from)?; + } + } else { + dst_initialization_status.mips[destination.mip_level as usize] + .drain(init_layer_range); + } + } + dst.info + .use_at(device.active_submission_index.load(Ordering::Relaxed) + 1); + + let snatch_guard = device.snatchable_lock.read(); + let dst_raw = dst + .raw(&snatch_guard) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + + let regions = hal::TextureCopy { + src_base: hal::TextureCopyBase { + mip_level: 0, + array_layer: 0, + origin: source.origin.to_3d(0), + aspect: hal::FormatAspects::COLOR, + }, + dst_base, + size: hal_copy_size, + }; + + unsafe { + let mut trackers = device.trackers.lock(); + let transitions = trackers + .textures + .set_single(&dst, selector, hal::TextureUses::COPY_DST) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + encoder.transition_textures(transitions.map(|pending| pending.into_hal(dst_raw))); + encoder.copy_external_image_to_texture( + source, + dst_raw, + destination.premultiplied_alpha, + iter::once(regions), + ); + } + + Ok(()) + } + + pub fn queue_submit<A: HalApi>( + &self, + queue_id: QueueId, + command_buffer_ids: &[id::CommandBufferId], + ) -> Result<WrappedSubmissionIndex, QueueSubmitError> { + profiling::scope!("Queue::submit"); + api_log!("Queue::submit {queue_id:?}"); + + let (submit_index, callbacks) = { + let hub = A::hub(self); + + let queue = hub + .queues + .get(queue_id) + .map_err(|_| DeviceError::InvalidQueueId)?; + + let device = queue.device.as_ref().unwrap(); + + let mut fence = device.fence.write(); + let fence = fence.as_mut().unwrap(); + let submit_index = device + .active_submission_index + .fetch_add(1, Ordering::Relaxed) + + 1; + let mut active_executions = Vec::new(); + + let mut used_surface_textures = track::TextureUsageScope::new(); + + let snatch_guard = device.snatchable_lock.read(); + + let mut submit_surface_textures_owned = SmallVec::<[_; 2]>::new(); + + { + let mut command_buffer_guard = hub.command_buffers.write(); + + if !command_buffer_ids.is_empty() { + profiling::scope!("prepare"); + + //TODO: if multiple command buffers are submitted, we can re-use the last + // native command buffer of the previous chain instead of always creating + // a temporary one, since the chains are not finished. + let mut temp_suspected = device.temp_suspected.lock(); + { + let mut suspected = temp_suspected.replace(ResourceMaps::new()).unwrap(); + suspected.clear(); + } + + // finish all the command buffers first + for &cmb_id in command_buffer_ids { + // we reset the used surface textures every time we use + // it, so make sure to set_size on it. + used_surface_textures.set_size(hub.textures.read().len()); + + #[allow(unused_mut)] + let mut cmdbuf = match command_buffer_guard.replace_with_error(cmb_id) { + Ok(cmdbuf) => cmdbuf, + Err(_) => continue, + }; + + if cmdbuf.device.as_info().id() != queue_id.transmute() { + return Err(DeviceError::WrongDevice.into()); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(Action::Submit( + submit_index, + cmdbuf + .data + .lock() + .as_mut() + .unwrap() + .commands + .take() + .unwrap(), + )); + } + if !cmdbuf.is_finished() { + if let Some(cmdbuf) = Arc::into_inner(cmdbuf) { + device.destroy_command_buffer(cmdbuf); + } else { + panic!( + "Command buffer cannot be destroyed because is still in use" + ); + } + continue; + } + + // optimize the tracked states + // cmdbuf.trackers.optimize(); + { + let cmd_buf_data = cmdbuf.data.lock(); + let cmd_buf_trackers = &cmd_buf_data.as_ref().unwrap().trackers; + + // update submission IDs + for buffer in cmd_buf_trackers.buffers.used_resources() { + let id = buffer.info.id(); + let raw_buf = match buffer.raw.get(&snatch_guard) { + Some(raw) => raw, + None => { + return Err(QueueSubmitError::DestroyedBuffer(id)); + } + }; + buffer.info.use_at(submit_index); + if buffer.is_unique() { + if let BufferMapState::Active { .. } = *buffer.map_state.lock() + { + log::warn!("Dropped buffer has a pending mapping."); + unsafe { device.raw().unmap_buffer(raw_buf) } + .map_err(DeviceError::from)?; + } + temp_suspected + .as_mut() + .unwrap() + .buffers + .insert(id, buffer.clone()); + } else { + match *buffer.map_state.lock() { + BufferMapState::Idle => (), + _ => return Err(QueueSubmitError::BufferStillMapped(id)), + } + } + } + for texture in cmd_buf_trackers.textures.used_resources() { + let id = texture.info.id(); + let should_extend = match texture.inner.get(&snatch_guard) { + None => { + return Err(QueueSubmitError::DestroyedTexture(id)); + } + Some(TextureInner::Native { .. }) => false, + Some(TextureInner::Surface { + ref has_work, + ref raw, + .. + }) => { + has_work.store(true, Ordering::Relaxed); + + if raw.is_some() { + submit_surface_textures_owned.push(texture.clone()); + } + + true + } + }; + texture.info.use_at(submit_index); + if texture.is_unique() { + temp_suspected + .as_mut() + .unwrap() + .textures + .insert(id, texture.clone()); + } + if should_extend { + unsafe { + used_surface_textures + .merge_single(&texture, None, hal::TextureUses::PRESENT) + .unwrap(); + }; + } + } + for texture_view in cmd_buf_trackers.views.used_resources() { + texture_view.info.use_at(submit_index); + if texture_view.is_unique() { + temp_suspected + .as_mut() + .unwrap() + .texture_views + .insert(texture_view.as_info().id(), texture_view.clone()); + } + } + { + for bg in cmd_buf_trackers.bind_groups.used_resources() { + bg.info.use_at(submit_index); + // We need to update the submission indices for the contained + // state-less (!) resources as well, so that they don't get + // deleted too early if the parent bind group goes out of scope. + for view in bg.used.views.used_resources() { + view.info.use_at(submit_index); + } + for sampler in bg.used.samplers.used_resources() { + sampler.info.use_at(submit_index); + } + if bg.is_unique() { + temp_suspected + .as_mut() + .unwrap() + .bind_groups + .insert(bg.as_info().id(), bg.clone()); + } + } + } + // assert!(cmd_buf_trackers.samplers.is_empty()); + for compute_pipeline in + cmd_buf_trackers.compute_pipelines.used_resources() + { + compute_pipeline.info.use_at(submit_index); + if compute_pipeline.is_unique() { + temp_suspected.as_mut().unwrap().compute_pipelines.insert( + compute_pipeline.as_info().id(), + compute_pipeline.clone(), + ); + } + } + for render_pipeline in + cmd_buf_trackers.render_pipelines.used_resources() + { + render_pipeline.info.use_at(submit_index); + if render_pipeline.is_unique() { + temp_suspected.as_mut().unwrap().render_pipelines.insert( + render_pipeline.as_info().id(), + render_pipeline.clone(), + ); + } + } + for query_set in cmd_buf_trackers.query_sets.used_resources() { + query_set.info.use_at(submit_index); + if query_set.is_unique() { + temp_suspected + .as_mut() + .unwrap() + .query_sets + .insert(query_set.as_info().id(), query_set.clone()); + } + } + for bundle in cmd_buf_trackers.bundles.used_resources() { + bundle.info.use_at(submit_index); + // We need to update the submission indices for the contained + // state-less (!) resources as well, excluding the bind groups. + // They don't get deleted too early if the bundle goes out of scope. + for render_pipeline in + bundle.used.render_pipelines.read().used_resources() + { + render_pipeline.info.use_at(submit_index); + } + for query_set in bundle.used.query_sets.read().used_resources() { + query_set.info.use_at(submit_index); + } + if bundle.is_unique() { + temp_suspected + .as_mut() + .unwrap() + .render_bundles + .insert(bundle.as_info().id(), bundle.clone()); + } + } + } + let mut baked = cmdbuf.from_arc_into_baked(); + // execute resource transitions + unsafe { + baked + .encoder + .begin_encoding(hal_label( + Some("(wgpu internal) Transit"), + device.instance_flags, + )) + .map_err(DeviceError::from)? + }; + log::trace!("Stitching command buffer {:?} before submission", cmb_id); + + //Note: locking the trackers has to be done after the storages + let mut trackers = device.trackers.lock(); + baked + .initialize_buffer_memory(&mut *trackers) + .map_err(|err| QueueSubmitError::DestroyedBuffer(err.0))?; + baked + .initialize_texture_memory(&mut *trackers, device) + .map_err(|err| QueueSubmitError::DestroyedTexture(err.0))?; + //Note: stateless trackers are not merged: + // device already knows these resources exist. + CommandBuffer::insert_barriers_from_tracker( + &mut baked.encoder, + &mut *trackers, + &baked.trackers, + &snatch_guard, + ); + + let transit = unsafe { baked.encoder.end_encoding().unwrap() }; + baked.list.insert(0, transit); + + // Transition surface textures into `Present` state. + // Note: we could technically do it after all of the command buffers, + // but here we have a command encoder by hand, so it's easier to use it. + if !used_surface_textures.is_empty() { + unsafe { + baked + .encoder + .begin_encoding(hal_label( + Some("(wgpu internal) Present"), + device.instance_flags, + )) + .map_err(DeviceError::from)? + }; + trackers + .textures + .set_from_usage_scope(&used_surface_textures); + let (transitions, textures) = + trackers.textures.drain_transitions(&snatch_guard); + let texture_barriers = transitions + .into_iter() + .enumerate() + .map(|(i, p)| p.into_hal(textures[i].unwrap().raw().unwrap())); + let present = unsafe { + baked.encoder.transition_textures(texture_barriers); + baked.encoder.end_encoding().unwrap() + }; + baked.list.push(present); + used_surface_textures = track::TextureUsageScope::new(); + } + + // done + active_executions.push(EncoderInFlight { + raw: baked.encoder, + cmd_buffers: baked.list, + }); + } + + log::trace!("Device after submission {}", submit_index); + } + } + + let mut pending_writes = device.pending_writes.lock(); + let pending_writes = pending_writes.as_mut().unwrap(); + + { + used_surface_textures.set_size(hub.textures.read().len()); + for (&id, texture) in pending_writes.dst_textures.iter() { + match texture.inner.get(&snatch_guard) { + None => { + return Err(QueueSubmitError::DestroyedTexture(id)); + } + Some(TextureInner::Native { .. }) => {} + Some(TextureInner::Surface { + ref has_work, + ref raw, + .. + }) => { + has_work.store(true, Ordering::Relaxed); + + if raw.is_some() { + submit_surface_textures_owned.push(texture.clone()); + } + + unsafe { + used_surface_textures + .merge_single(texture, None, hal::TextureUses::PRESENT) + .unwrap() + }; + } + } + } + + if !used_surface_textures.is_empty() { + let mut trackers = device.trackers.lock(); + + trackers + .textures + .set_from_usage_scope(&used_surface_textures); + let (transitions, textures) = + trackers.textures.drain_transitions(&snatch_guard); + let texture_barriers = transitions + .into_iter() + .enumerate() + .map(|(i, p)| p.into_hal(textures[i].unwrap().raw().unwrap())); + unsafe { + pending_writes + .command_encoder + .transition_textures(texture_barriers); + }; + } + } + + let refs = pending_writes + .pre_submit()? + .into_iter() + .chain( + active_executions + .iter() + .flat_map(|pool_execution| pool_execution.cmd_buffers.iter()), + ) + .collect::<Vec<_>>(); + + let mut submit_surface_textures = + SmallVec::<[_; 2]>::with_capacity(submit_surface_textures_owned.len()); + + for texture in &submit_surface_textures_owned { + submit_surface_textures.extend(match texture.inner.get(&snatch_guard) { + Some(TextureInner::Surface { raw, .. }) => raw.as_ref(), + _ => None, + }); + } + + unsafe { + queue + .raw + .as_ref() + .unwrap() + .submit(&refs, &submit_surface_textures, Some((fence, submit_index))) + .map_err(DeviceError::from)?; + } + + profiling::scope!("cleanup"); + if let Some(pending_execution) = pending_writes.post_submit( + device.command_allocator.lock().as_mut().unwrap(), + device.raw(), + queue.raw.as_ref().unwrap(), + ) { + active_executions.push(pending_execution); + } + + // this will register the new submission to the life time tracker + let mut pending_write_resources = mem::take(&mut pending_writes.temp_resources); + device.lock_life().track_submission( + submit_index, + pending_write_resources.drain(..), + active_executions, + ); + + // This will schedule destruction of all resources that are no longer needed + // by the user but used in the command stream, among other things. + let (closures, _) = match device.maintain(fence, wgt::Maintain::Poll) { + Ok(closures) => closures, + Err(WaitIdleError::Device(err)) => return Err(QueueSubmitError::Queue(err)), + Err(WaitIdleError::StuckGpu) => return Err(QueueSubmitError::StuckGpu), + Err(WaitIdleError::WrongSubmissionIndex(..)) => unreachable!(), + }; + + // pending_write_resources has been drained, so it's empty, but we + // want to retain its heap allocation. + pending_writes.temp_resources = pending_write_resources; + device.lock_life().post_submit(); + + (submit_index, closures) + }; + + // the closures should execute with nothing locked! + callbacks.fire(); + + Ok(WrappedSubmissionIndex { + queue_id, + index: submit_index, + }) + } + + pub fn queue_get_timestamp_period<A: HalApi>( + &self, + queue_id: QueueId, + ) -> Result<f32, InvalidQueue> { + let hub = A::hub(self); + match hub.queues.get(queue_id) { + Ok(queue) => Ok(unsafe { queue.raw.as_ref().unwrap().get_timestamp_period() }), + Err(_) => Err(InvalidQueue), + } + } + + pub fn queue_on_submitted_work_done<A: HalApi>( + &self, + queue_id: QueueId, + closure: SubmittedWorkDoneClosure, + ) -> Result<(), InvalidQueue> { + api_log!("Queue::on_submitted_work_done {queue_id:?}"); + + //TODO: flush pending writes + let hub = A::hub(self); + match hub.queues.get(queue_id) { + Ok(queue) => queue + .device + .as_ref() + .unwrap() + .lock_life() + .add_work_done_closure(closure), + Err(_) => return Err(InvalidQueue), + } + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/device/resource.rs b/third_party/rust/wgpu-core/src/device/resource.rs new file mode 100644 index 0000000000..b2c85a056a --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/resource.rs @@ -0,0 +1,3530 @@ +#[cfg(feature = "trace")] +use crate::device::trace; +use crate::{ + binding_model::{self, BindGroup, BindGroupLayout, BindGroupLayoutEntryError}, + command, conv, + device::{ + bgl, + life::{LifetimeTracker, WaitIdleError}, + queue::PendingWrites, + AttachmentData, CommandAllocator, DeviceLostInvocation, MissingDownlevelFlags, + MissingFeatures, RenderPassContext, CLEANUP_WAIT_MS, + }, + hal_api::HalApi, + hal_label, + hub::Hub, + id::QueueId, + init_tracker::{ + BufferInitTracker, BufferInitTrackerAction, MemoryInitKind, TextureInitRange, + TextureInitTracker, TextureInitTrackerAction, + }, + instance::Adapter, + pipeline, + pool::ResourcePool, + registry::Registry, + resource::{ + self, Buffer, QuerySet, Resource, ResourceInfo, ResourceType, Sampler, Texture, + TextureView, TextureViewNotRenderableReason, + }, + resource_log, + snatch::{SnatchGuard, SnatchLock, Snatchable}, + storage::Storage, + track::{BindGroupStates, TextureSelector, Tracker}, + validation::{self, check_buffer_usage, check_texture_usage}, + FastHashMap, LabelHelpers as _, SubmissionIndex, +}; + +use arrayvec::ArrayVec; +use hal::{CommandEncoder as _, Device as _}; +use parking_lot::{Mutex, MutexGuard, RwLock}; + +use smallvec::SmallVec; +use thiserror::Error; +use wgt::{DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimension}; + +use std::{ + borrow::Cow, + iter, + num::NonZeroU32, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, Weak, + }, +}; + +use super::{ + life::{self, ResourceMaps}, + queue::{self}, + DeviceDescriptor, DeviceError, ImplicitPipelineContext, UserClosures, ENTRYPOINT_FAILURE_ERROR, + IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL, ZERO_BUFFER_SIZE, +}; + +/// Structure describing a logical device. Some members are internally mutable, +/// stored behind mutexes. +/// +/// TODO: establish clear order of locking for these: +/// `life_tracker`, `trackers`, `render_passes`, `pending_writes`, `trace`. +/// +/// Currently, the rules are: +/// 1. `life_tracker` is locked after `hub.devices`, enforced by the type system +/// 1. `self.trackers` is locked last (unenforced) +/// 1. `self.trace` is locked last (unenforced) +/// +/// Right now avoid locking twice same resource or registry in a call execution +/// and minimize the locking to the minimum scope possible +/// Unless otherwise specified, no lock may be acquired while holding another lock. +/// This means that you must inspect function calls made while a lock is held +/// to see what locks the callee may try to acquire. +/// +/// As far as this point: +/// device_maintain_ids locks Device::lifetime_tracker, and calls... +/// triage_suspected locks Device::trackers, and calls... +/// Registry::unregister locks Registry::storage +/// +/// Important: +/// When locking pending_writes please check that trackers is not locked +/// trackers should be locked only when needed for the shortest time possible +pub struct Device<A: HalApi> { + raw: Option<A::Device>, + pub(crate) adapter: Arc<Adapter<A>>, + pub(crate) queue_id: RwLock<Option<QueueId>>, + queue_to_drop: RwLock<Option<A::Queue>>, + pub(crate) zero_buffer: Option<A::Buffer>, + pub(crate) info: ResourceInfo<Device<A>>, + + pub(crate) command_allocator: Mutex<Option<CommandAllocator<A>>>, + //Note: The submission index here corresponds to the last submission that is done. + pub(crate) active_submission_index: AtomicU64, //SubmissionIndex, + pub(crate) fence: RwLock<Option<A::Fence>>, + pub(crate) snatchable_lock: SnatchLock, + + /// Is this device valid? Valid is closely associated with "lose the device", + /// which can be triggered by various methods, including at the end of device + /// destroy, and by any GPU errors that cause us to no longer trust the state + /// of the device. Ideally we would like to fold valid into the storage of + /// the device itself (for example as an Error enum), but unfortunately we + /// need to continue to be able to retrieve the device in poll_devices to + /// determine if it can be dropped. If our internal accesses of devices were + /// done through ref-counted references and external accesses checked for + /// Error enums, we wouldn't need this. For now, we need it. All the call + /// sites where we check it are areas that should be revisited if we start + /// using ref-counted references for internal access. + pub(crate) valid: AtomicBool, + + /// All live resources allocated with this [`Device`]. + /// + /// Has to be locked temporarily only (locked last) + /// and never before pending_writes + pub(crate) trackers: Mutex<Tracker<A>>, + // Life tracker should be locked right after the device and before anything else. + life_tracker: Mutex<LifetimeTracker<A>>, + /// Temporary storage for resource management functions. Cleared at the end + /// of every call (unless an error occurs). + pub(crate) temp_suspected: Mutex<Option<ResourceMaps<A>>>, + /// Pool of bind group layouts, allowing deduplication. + pub(crate) bgl_pool: ResourcePool<bgl::EntryMap, BindGroupLayout<A>>, + pub(crate) alignments: hal::Alignments, + pub(crate) limits: wgt::Limits, + pub(crate) features: wgt::Features, + pub(crate) downlevel: wgt::DownlevelCapabilities, + pub(crate) instance_flags: wgt::InstanceFlags, + pub(crate) pending_writes: Mutex<Option<PendingWrites<A>>>, + pub(crate) deferred_destroy: Mutex<Vec<DeferredDestroy<A>>>, + #[cfg(feature = "trace")] + pub(crate) trace: Mutex<Option<trace::Trace>>, +} + +pub(crate) enum DeferredDestroy<A: HalApi> { + TextureView(Weak<TextureView<A>>), + BindGroup(Weak<BindGroup<A>>), +} + +impl<A: HalApi> std::fmt::Debug for Device<A> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Device") + .field("adapter", &self.adapter.info.label()) + .field("limits", &self.limits) + .field("features", &self.features) + .field("downlevel", &self.downlevel) + .finish() + } +} + +impl<A: HalApi> Drop for Device<A> { + fn drop(&mut self) { + resource_log!("Destroy raw Device {:?}", self.info.label()); + let raw = self.raw.take().unwrap(); + let pending_writes = self.pending_writes.lock().take().unwrap(); + pending_writes.dispose(&raw); + self.command_allocator.lock().take().unwrap().dispose(&raw); + unsafe { + raw.destroy_buffer(self.zero_buffer.take().unwrap()); + raw.destroy_fence(self.fence.write().take().unwrap()); + let queue = self.queue_to_drop.write().take().unwrap(); + raw.exit(queue); + } + } +} + +#[derive(Clone, Debug, Error)] +pub enum CreateDeviceError { + #[error("Not enough memory left to create device")] + OutOfMemory, + #[error("Failed to create internal buffer for initializing textures")] + FailedToCreateZeroBuffer(#[from] DeviceError), +} + +impl<A: HalApi> Device<A> { + pub(crate) fn raw(&self) -> &A::Device { + self.raw.as_ref().unwrap() + } + pub(crate) fn require_features(&self, feature: wgt::Features) -> Result<(), MissingFeatures> { + if self.features.contains(feature) { + Ok(()) + } else { + Err(MissingFeatures(feature)) + } + } + + pub(crate) fn require_downlevel_flags( + &self, + flags: wgt::DownlevelFlags, + ) -> Result<(), MissingDownlevelFlags> { + if self.downlevel.flags.contains(flags) { + Ok(()) + } else { + Err(MissingDownlevelFlags(flags)) + } + } +} + +impl<A: HalApi> Device<A> { + pub(crate) fn new( + raw_device: A::Device, + raw_queue: &A::Queue, + adapter: &Arc<Adapter<A>>, + desc: &DeviceDescriptor, + trace_path: Option<&std::path::Path>, + instance_flags: wgt::InstanceFlags, + ) -> Result<Self, CreateDeviceError> { + #[cfg(not(feature = "trace"))] + if let Some(_) = trace_path { + log::error!("Feature 'trace' is not enabled"); + } + let fence = + unsafe { raw_device.create_fence() }.map_err(|_| CreateDeviceError::OutOfMemory)?; + + let mut com_alloc = CommandAllocator { + free_encoders: Vec::new(), + }; + let pending_encoder = com_alloc + .acquire_encoder(&raw_device, raw_queue) + .map_err(|_| CreateDeviceError::OutOfMemory)?; + let mut pending_writes = queue::PendingWrites::<A>::new(pending_encoder); + + // Create zeroed buffer used for texture clears. + let zero_buffer = unsafe { + raw_device + .create_buffer(&hal::BufferDescriptor { + label: hal_label(Some("(wgpu internal) zero init buffer"), instance_flags), + size: ZERO_BUFFER_SIZE, + usage: hal::BufferUses::COPY_SRC | hal::BufferUses::COPY_DST, + memory_flags: hal::MemoryFlags::empty(), + }) + .map_err(DeviceError::from)? + }; + pending_writes.activate(); + unsafe { + pending_writes + .command_encoder + .transition_buffers(iter::once(hal::BufferBarrier { + buffer: &zero_buffer, + usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST, + })); + pending_writes + .command_encoder + .clear_buffer(&zero_buffer, 0..ZERO_BUFFER_SIZE); + pending_writes + .command_encoder + .transition_buffers(iter::once(hal::BufferBarrier { + buffer: &zero_buffer, + usage: hal::BufferUses::COPY_DST..hal::BufferUses::COPY_SRC, + })); + } + + let alignments = adapter.raw.capabilities.alignments.clone(); + let downlevel = adapter.raw.capabilities.downlevel.clone(); + + Ok(Self { + raw: Some(raw_device), + adapter: adapter.clone(), + queue_id: RwLock::new(None), + queue_to_drop: RwLock::new(None), + zero_buffer: Some(zero_buffer), + info: ResourceInfo::new("<device>"), + command_allocator: Mutex::new(Some(com_alloc)), + active_submission_index: AtomicU64::new(0), + fence: RwLock::new(Some(fence)), + snatchable_lock: unsafe { SnatchLock::new() }, + valid: AtomicBool::new(true), + trackers: Mutex::new(Tracker::new()), + life_tracker: Mutex::new(life::LifetimeTracker::new()), + temp_suspected: Mutex::new(Some(life::ResourceMaps::new())), + bgl_pool: ResourcePool::new(), + #[cfg(feature = "trace")] + trace: Mutex::new(trace_path.and_then(|path| match trace::Trace::new(path) { + Ok(mut trace) => { + trace.add(trace::Action::Init { + desc: desc.clone(), + backend: A::VARIANT, + }); + Some(trace) + } + Err(e) => { + log::error!("Unable to start a trace in '{path:?}': {e}"); + None + } + })), + alignments, + limits: desc.required_limits.clone(), + features: desc.required_features, + downlevel, + instance_flags, + pending_writes: Mutex::new(Some(pending_writes)), + deferred_destroy: Mutex::new(Vec::new()), + }) + } + + pub fn is_valid(&self) -> bool { + self.valid.load(Ordering::Acquire) + } + + pub(crate) fn release_queue(&self, queue: A::Queue) { + self.queue_to_drop.write().replace(queue); + } + + pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker<A>> { + self.life_tracker.lock() + } + + /// Run some destroy operations that were deferred. + /// + /// Destroying the resources requires taking a write lock on the device's snatch lock, + /// so a good reason for deferring resource destruction is when we don't know for sure + /// how risky it is to take the lock (typically, it shouldn't be taken from the drop + /// implementation of a reference-counted structure). + /// The snatch lock must not be held while this function is called. + pub(crate) fn deferred_resource_destruction(&self) { + while let Some(item) = self.deferred_destroy.lock().pop() { + match item { + DeferredDestroy::TextureView(view) => { + let Some(view) = view.upgrade() else { + continue; + }; + let Some(raw_view) = view.raw.snatch(self.snatchable_lock.write()) else { + continue; + }; + + resource_log!("Destroy raw TextureView (destroyed) {:?}", view.label()); + #[cfg(feature = "trace")] + if let Some(t) = self.trace.lock().as_mut() { + t.add(trace::Action::DestroyTextureView(view.info.id())); + } + unsafe { + use hal::Device; + self.raw().destroy_texture_view(raw_view); + } + } + DeferredDestroy::BindGroup(bind_group) => { + let Some(bind_group) = bind_group.upgrade() else { + continue; + }; + let Some(raw_bind_group) = bind_group.raw.snatch(self.snatchable_lock.write()) else { + continue; + }; + + resource_log!("Destroy raw BindGroup (destroyed) {:?}", bind_group.label()); + #[cfg(feature = "trace")] + if let Some(t) = self.trace.lock().as_mut() { + t.add(trace::Action::DestroyBindGroup(bind_group.info.id())); + } + unsafe { + use hal::Device; + self.raw().destroy_bind_group(raw_bind_group); + } + } + } + } + } + + /// Check this device for completed commands. + /// + /// The `maintain` argument tells how the maintence function should behave, either + /// blocking or just polling the current state of the gpu. + /// + /// Return a pair `(closures, queue_empty)`, where: + /// + /// - `closures` is a list of actions to take: mapping buffers, notifying the user + /// + /// - `queue_empty` is a boolean indicating whether there are more queue + /// submissions still in flight. (We have to take the locks needed to + /// produce this information for other reasons, so we might as well just + /// return it to our callers.) + pub(crate) fn maintain<'this>( + &'this self, + fence: &A::Fence, + maintain: wgt::Maintain<queue::WrappedSubmissionIndex>, + ) -> Result<(UserClosures, bool), WaitIdleError> { + profiling::scope!("Device::maintain"); + let last_done_index = if maintain.is_wait() { + let index_to_wait_for = match maintain { + wgt::Maintain::WaitForSubmissionIndex(submission_index) => { + // We don't need to check to see if the queue id matches + // as we already checked this from inside the poll call. + submission_index.index + } + _ => self.active_submission_index.load(Ordering::Relaxed), + }; + unsafe { + self.raw + .as_ref() + .unwrap() + .wait(fence, index_to_wait_for, CLEANUP_WAIT_MS) + .map_err(DeviceError::from)? + }; + index_to_wait_for + } else { + unsafe { + self.raw + .as_ref() + .unwrap() + .get_fence_value(fence) + .map_err(DeviceError::from)? + } + }; + + let mut life_tracker = self.lock_life(); + let submission_closures = life_tracker.triage_submissions( + last_done_index, + self.command_allocator.lock().as_mut().unwrap(), + ); + + { + // Normally, `temp_suspected` exists only to save heap + // allocations: it's cleared at the start of the function + // call, and cleared by the end. But `Global::queue_submit` is + // fallible; if it exits early, it may leave some resources in + // `temp_suspected`. + let temp_suspected = self + .temp_suspected + .lock() + .replace(ResourceMaps::new()) + .unwrap(); + + life_tracker.suspected_resources.extend(temp_suspected); + + life_tracker.triage_suspected(&self.trackers); + life_tracker.triage_mapped(); + } + + let mapping_closures = life_tracker.handle_mapping(self.raw(), &self.trackers); + + let queue_empty = life_tracker.queue_empty(); + + // Detect if we have been destroyed and now need to lose the device. + // If we are invalid (set at start of destroy) and our queue is empty, + // and we have a DeviceLostClosure, return the closure to be called by + // our caller. This will complete the steps for both destroy and for + // "lose the device". + let mut device_lost_invocations = SmallVec::new(); + let mut should_release_gpu_resource = false; + if !self.is_valid() && queue_empty { + // We can release gpu resources associated with this device (but not + // while holding the life_tracker lock). + should_release_gpu_resource = true; + + // If we have a DeviceLostClosure, build an invocation with the + // reason DeviceLostReason::Destroyed and no message. + if life_tracker.device_lost_closure.is_some() { + device_lost_invocations.push(DeviceLostInvocation { + closure: life_tracker.device_lost_closure.take().unwrap(), + reason: DeviceLostReason::Destroyed, + message: String::new(), + }); + } + } + + // Don't hold the lock while calling release_gpu_resources. + drop(life_tracker); + + if should_release_gpu_resource { + self.release_gpu_resources(); + } + + let closures = UserClosures { + mappings: mapping_closures, + submissions: submission_closures, + device_lost_invocations, + }; + Ok((closures, queue_empty)) + } + + pub(crate) fn untrack(&self, trackers: &Tracker<A>) { + let mut temp_suspected = self + .temp_suspected + .lock() + .replace(ResourceMaps::new()) + .unwrap(); + temp_suspected.clear(); + // As the tracker is cleared/dropped, we need to consider all the resources + // that it references for destruction in the next GC pass. + { + for resource in trackers.buffers.used_resources() { + if resource.is_unique() { + temp_suspected + .buffers + .insert(resource.as_info().id(), resource.clone()); + } + } + for resource in trackers.textures.used_resources() { + if resource.is_unique() { + temp_suspected + .textures + .insert(resource.as_info().id(), resource.clone()); + } + } + for resource in trackers.views.used_resources() { + if resource.is_unique() { + temp_suspected + .texture_views + .insert(resource.as_info().id(), resource.clone()); + } + } + for resource in trackers.bind_groups.used_resources() { + if resource.is_unique() { + temp_suspected + .bind_groups + .insert(resource.as_info().id(), resource.clone()); + } + } + for resource in trackers.samplers.used_resources() { + if resource.is_unique() { + temp_suspected + .samplers + .insert(resource.as_info().id(), resource.clone()); + } + } + for resource in trackers.compute_pipelines.used_resources() { + if resource.is_unique() { + temp_suspected + .compute_pipelines + .insert(resource.as_info().id(), resource.clone()); + } + } + for resource in trackers.render_pipelines.used_resources() { + if resource.is_unique() { + temp_suspected + .render_pipelines + .insert(resource.as_info().id(), resource.clone()); + } + } + for resource in trackers.query_sets.used_resources() { + if resource.is_unique() { + temp_suspected + .query_sets + .insert(resource.as_info().id(), resource.clone()); + } + } + } + self.lock_life().suspected_resources.extend(temp_suspected); + } + + pub(crate) fn create_buffer( + self: &Arc<Self>, + desc: &resource::BufferDescriptor, + transient: bool, + ) -> Result<Buffer<A>, resource::CreateBufferError> { + debug_assert_eq!(self.as_info().id().backend(), A::VARIANT); + + if desc.size > self.limits.max_buffer_size { + return Err(resource::CreateBufferError::MaxBufferSize { + requested: desc.size, + maximum: self.limits.max_buffer_size, + }); + } + + if desc.usage.contains(wgt::BufferUsages::INDEX) + && desc.usage.contains( + wgt::BufferUsages::VERTEX + | wgt::BufferUsages::UNIFORM + | wgt::BufferUsages::INDIRECT + | wgt::BufferUsages::STORAGE, + ) + { + self.require_downlevel_flags(wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER)?; + } + + let mut usage = conv::map_buffer_usage(desc.usage); + + if desc.usage.is_empty() || desc.usage.contains_invalid_bits() { + return Err(resource::CreateBufferError::InvalidUsage(desc.usage)); + } + + if !self + .features + .contains(wgt::Features::MAPPABLE_PRIMARY_BUFFERS) + { + use wgt::BufferUsages as Bu; + let write_mismatch = desc.usage.contains(Bu::MAP_WRITE) + && !(Bu::MAP_WRITE | Bu::COPY_SRC).contains(desc.usage); + let read_mismatch = desc.usage.contains(Bu::MAP_READ) + && !(Bu::MAP_READ | Bu::COPY_DST).contains(desc.usage); + if write_mismatch || read_mismatch { + return Err(resource::CreateBufferError::UsageMismatch(desc.usage)); + } + } + + if desc.mapped_at_creation { + if desc.size % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(resource::CreateBufferError::UnalignedSize); + } + if !desc.usage.contains(wgt::BufferUsages::MAP_WRITE) { + // we are going to be copying into it, internally + usage |= hal::BufferUses::COPY_DST; + } + } else { + // We are required to zero out (initialize) all memory. This is done + // on demand using clear_buffer which requires write transfer usage! + usage |= hal::BufferUses::COPY_DST; + } + + let actual_size = if desc.size == 0 { + wgt::COPY_BUFFER_ALIGNMENT + } else if desc.usage.contains(wgt::BufferUsages::VERTEX) { + // Bumping the size by 1 so that we can bind an empty range at the + // end of the buffer. + desc.size + 1 + } else { + desc.size + }; + let clear_remainder = actual_size % wgt::COPY_BUFFER_ALIGNMENT; + let aligned_size = if clear_remainder != 0 { + actual_size + wgt::COPY_BUFFER_ALIGNMENT - clear_remainder + } else { + actual_size + }; + + let mut memory_flags = hal::MemoryFlags::empty(); + memory_flags.set(hal::MemoryFlags::TRANSIENT, transient); + + let hal_desc = hal::BufferDescriptor { + label: desc.label.to_hal(self.instance_flags), + size: aligned_size, + usage, + memory_flags, + }; + let buffer = unsafe { self.raw().create_buffer(&hal_desc) }.map_err(DeviceError::from)?; + + Ok(Buffer { + raw: Snatchable::new(buffer), + device: self.clone(), + usage: desc.usage, + size: desc.size, + initialization_status: RwLock::new(BufferInitTracker::new(aligned_size)), + sync_mapped_writes: Mutex::new(None), + map_state: Mutex::new(resource::BufferMapState::Idle), + info: ResourceInfo::new(desc.label.borrow_or_default()), + bind_groups: Mutex::new(Vec::new()), + }) + } + + pub(crate) fn create_texture_from_hal( + self: &Arc<Self>, + hal_texture: A::Texture, + hal_usage: hal::TextureUses, + desc: &resource::TextureDescriptor, + format_features: wgt::TextureFormatFeatures, + clear_mode: resource::TextureClearMode<A>, + ) -> Texture<A> { + debug_assert_eq!(self.as_info().id().backend(), A::VARIANT); + + Texture { + inner: Snatchable::new(resource::TextureInner::Native { raw: hal_texture }), + device: self.clone(), + desc: desc.map_label(|_| ()), + hal_usage, + format_features, + initialization_status: RwLock::new(TextureInitTracker::new( + desc.mip_level_count, + desc.array_layer_count(), + )), + full_range: TextureSelector { + mips: 0..desc.mip_level_count, + layers: 0..desc.array_layer_count(), + }, + info: ResourceInfo::new(desc.label.borrow_or_default()), + clear_mode: RwLock::new(clear_mode), + views: Mutex::new(Vec::new()), + bind_groups: Mutex::new(Vec::new()), + } + } + + pub fn create_buffer_from_hal( + self: &Arc<Self>, + hal_buffer: A::Buffer, + desc: &resource::BufferDescriptor, + ) -> Buffer<A> { + debug_assert_eq!(self.as_info().id().backend(), A::VARIANT); + + Buffer { + raw: Snatchable::new(hal_buffer), + device: self.clone(), + usage: desc.usage, + size: desc.size, + initialization_status: RwLock::new(BufferInitTracker::new(0)), + sync_mapped_writes: Mutex::new(None), + map_state: Mutex::new(resource::BufferMapState::Idle), + info: ResourceInfo::new(desc.label.borrow_or_default()), + bind_groups: Mutex::new(Vec::new()), + } + } + + pub(crate) fn create_texture( + self: &Arc<Self>, + adapter: &Adapter<A>, + desc: &resource::TextureDescriptor, + ) -> Result<Texture<A>, resource::CreateTextureError> { + use resource::{CreateTextureError, TextureDimensionError}; + + if desc.usage.is_empty() || desc.usage.contains_invalid_bits() { + return Err(CreateTextureError::InvalidUsage(desc.usage)); + } + + conv::check_texture_dimension_size( + desc.dimension, + desc.size, + desc.sample_count, + &self.limits, + )?; + + if desc.dimension != wgt::TextureDimension::D2 { + // Depth textures can only be 2D + if desc.format.is_depth_stencil_format() { + return Err(CreateTextureError::InvalidDepthDimension( + desc.dimension, + desc.format, + )); + } + // Renderable textures can only be 2D + if desc.usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) { + return Err(CreateTextureError::InvalidDimensionUsages( + wgt::TextureUsages::RENDER_ATTACHMENT, + desc.dimension, + )); + } + + // Compressed textures can only be 2D + if desc.format.is_compressed() { + return Err(CreateTextureError::InvalidCompressedDimension( + desc.dimension, + desc.format, + )); + } + } + + if desc.format.is_compressed() { + let (block_width, block_height) = desc.format.block_dimensions(); + + if desc.size.width % block_width != 0 { + return Err(CreateTextureError::InvalidDimension( + TextureDimensionError::NotMultipleOfBlockWidth { + width: desc.size.width, + block_width, + format: desc.format, + }, + )); + } + + if desc.size.height % block_height != 0 { + return Err(CreateTextureError::InvalidDimension( + TextureDimensionError::NotMultipleOfBlockHeight { + height: desc.size.height, + block_height, + format: desc.format, + }, + )); + } + } + + { + let (width_multiple, height_multiple) = desc.format.size_multiple_requirement(); + + if desc.size.width % width_multiple != 0 { + return Err(CreateTextureError::InvalidDimension( + TextureDimensionError::WidthNotMultipleOf { + width: desc.size.width, + multiple: width_multiple, + format: desc.format, + }, + )); + } + + if desc.size.height % height_multiple != 0 { + return Err(CreateTextureError::InvalidDimension( + TextureDimensionError::HeightNotMultipleOf { + height: desc.size.height, + multiple: height_multiple, + format: desc.format, + }, + )); + } + } + + let format_features = self + .describe_format_features(adapter, desc.format) + .map_err(|error| CreateTextureError::MissingFeatures(desc.format, error))?; + + if desc.sample_count > 1 { + if desc.mip_level_count != 1 { + return Err(CreateTextureError::InvalidMipLevelCount { + requested: desc.mip_level_count, + maximum: 1, + }); + } + + if desc.size.depth_or_array_layers != 1 { + return Err(CreateTextureError::InvalidDimension( + TextureDimensionError::MultisampledDepthOrArrayLayer( + desc.size.depth_or_array_layers, + ), + )); + } + + if desc.usage.contains(wgt::TextureUsages::STORAGE_BINDING) { + return Err(CreateTextureError::InvalidMultisampledStorageBinding); + } + + if !desc.usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) { + return Err(CreateTextureError::MultisampledNotRenderAttachment); + } + + if !format_features.flags.intersects( + wgt::TextureFormatFeatureFlags::MULTISAMPLE_X4 + | wgt::TextureFormatFeatureFlags::MULTISAMPLE_X2 + | wgt::TextureFormatFeatureFlags::MULTISAMPLE_X8 + | wgt::TextureFormatFeatureFlags::MULTISAMPLE_X16, + ) { + return Err(CreateTextureError::InvalidMultisampledFormat(desc.format)); + } + + if !format_features + .flags + .sample_count_supported(desc.sample_count) + { + return Err(CreateTextureError::InvalidSampleCount( + desc.sample_count, + desc.format, + desc.format + .guaranteed_format_features(self.features) + .flags + .supported_sample_counts(), + adapter + .get_texture_format_features(desc.format) + .flags + .supported_sample_counts(), + )); + }; + } + + let mips = desc.mip_level_count; + let max_levels_allowed = desc.size.max_mips(desc.dimension).min(hal::MAX_MIP_LEVELS); + if mips == 0 || mips > max_levels_allowed { + return Err(CreateTextureError::InvalidMipLevelCount { + requested: mips, + maximum: max_levels_allowed, + }); + } + + let missing_allowed_usages = desc.usage - format_features.allowed_usages; + if !missing_allowed_usages.is_empty() { + // detect downlevel incompatibilities + let wgpu_allowed_usages = desc + .format + .guaranteed_format_features(self.features) + .allowed_usages; + let wgpu_missing_usages = desc.usage - wgpu_allowed_usages; + return Err(CreateTextureError::InvalidFormatUsages( + missing_allowed_usages, + desc.format, + wgpu_missing_usages.is_empty(), + )); + } + + let mut hal_view_formats = vec![]; + for format in desc.view_formats.iter() { + if desc.format == *format { + continue; + } + if desc.format.remove_srgb_suffix() != format.remove_srgb_suffix() { + return Err(CreateTextureError::InvalidViewFormat(*format, desc.format)); + } + hal_view_formats.push(*format); + } + if !hal_view_formats.is_empty() { + self.require_downlevel_flags(wgt::DownlevelFlags::VIEW_FORMATS)?; + } + + let hal_usage = conv::map_texture_usage_for_texture(desc, &format_features); + + let hal_desc = hal::TextureDescriptor { + label: desc.label.to_hal(self.instance_flags), + size: desc.size, + mip_level_count: desc.mip_level_count, + sample_count: desc.sample_count, + dimension: desc.dimension, + format: desc.format, + usage: hal_usage, + memory_flags: hal::MemoryFlags::empty(), + view_formats: hal_view_formats, + }; + + let raw_texture = unsafe { + self.raw + .as_ref() + .unwrap() + .create_texture(&hal_desc) + .map_err(DeviceError::from)? + }; + + let clear_mode = if hal_usage + .intersects(hal::TextureUses::DEPTH_STENCIL_WRITE | hal::TextureUses::COLOR_TARGET) + { + let (is_color, usage) = if desc.format.is_depth_stencil_format() { + (false, hal::TextureUses::DEPTH_STENCIL_WRITE) + } else { + (true, hal::TextureUses::COLOR_TARGET) + }; + let dimension = match desc.dimension { + wgt::TextureDimension::D1 => wgt::TextureViewDimension::D1, + wgt::TextureDimension::D2 => wgt::TextureViewDimension::D2, + wgt::TextureDimension::D3 => unreachable!(), + }; + + let clear_label = hal_label( + Some("(wgpu internal) clear texture view"), + self.instance_flags, + ); + + let mut clear_views = SmallVec::new(); + for mip_level in 0..desc.mip_level_count { + for array_layer in 0..desc.size.depth_or_array_layers { + macro_rules! push_clear_view { + ($format:expr, $aspect:expr) => { + let desc = hal::TextureViewDescriptor { + label: clear_label, + format: $format, + dimension, + usage, + range: wgt::ImageSubresourceRange { + aspect: $aspect, + base_mip_level: mip_level, + mip_level_count: Some(1), + base_array_layer: array_layer, + array_layer_count: Some(1), + }, + }; + clear_views.push(Some( + unsafe { self.raw().create_texture_view(&raw_texture, &desc) } + .map_err(DeviceError::from)?, + )); + }; + } + + if let Some(planes) = desc.format.planes() { + for plane in 0..planes { + let aspect = wgt::TextureAspect::from_plane(plane).unwrap(); + let format = desc.format.aspect_specific_format(aspect).unwrap(); + push_clear_view!(format, aspect); + } + } else { + push_clear_view!(desc.format, wgt::TextureAspect::All); + } + } + } + resource::TextureClearMode::RenderPass { + clear_views, + is_color, + } + } else { + resource::TextureClearMode::BufferCopy + }; + + let mut texture = + self.create_texture_from_hal(raw_texture, hal_usage, desc, format_features, clear_mode); + texture.hal_usage = hal_usage; + Ok(texture) + } + + pub(crate) fn create_texture_view( + self: &Arc<Self>, + texture: &Arc<Texture<A>>, + desc: &resource::TextureViewDescriptor, + ) -> Result<TextureView<A>, resource::CreateTextureViewError> { + let snatch_guard = texture.device.snatchable_lock.read(); + + let texture_raw = texture + .raw(&snatch_guard) + .ok_or(resource::CreateTextureViewError::InvalidTexture)?; + + // resolve TextureViewDescriptor defaults + // https://gpuweb.github.io/gpuweb/#abstract-opdef-resolving-gputextureviewdescriptor-defaults + let resolved_format = desc.format.unwrap_or_else(|| { + texture + .desc + .format + .aspect_specific_format(desc.range.aspect) + .unwrap_or(texture.desc.format) + }); + + let resolved_dimension = desc + .dimension + .unwrap_or_else(|| match texture.desc.dimension { + wgt::TextureDimension::D1 => wgt::TextureViewDimension::D1, + wgt::TextureDimension::D2 => { + if texture.desc.array_layer_count() == 1 { + wgt::TextureViewDimension::D2 + } else { + wgt::TextureViewDimension::D2Array + } + } + wgt::TextureDimension::D3 => wgt::TextureViewDimension::D3, + }); + + let resolved_mip_level_count = desc.range.mip_level_count.unwrap_or_else(|| { + texture + .desc + .mip_level_count + .saturating_sub(desc.range.base_mip_level) + }); + + let resolved_array_layer_count = + desc.range + .array_layer_count + .unwrap_or_else(|| match resolved_dimension { + wgt::TextureViewDimension::D1 + | wgt::TextureViewDimension::D2 + | wgt::TextureViewDimension::D3 => 1, + wgt::TextureViewDimension::Cube => 6, + wgt::TextureViewDimension::D2Array | wgt::TextureViewDimension::CubeArray => { + texture + .desc + .array_layer_count() + .saturating_sub(desc.range.base_array_layer) + } + }); + + // validate TextureViewDescriptor + + let aspects = hal::FormatAspects::new(texture.desc.format, desc.range.aspect); + if aspects.is_empty() { + return Err(resource::CreateTextureViewError::InvalidAspect { + texture_format: texture.desc.format, + requested_aspect: desc.range.aspect, + }); + } + + let format_is_good = if desc.range.aspect == wgt::TextureAspect::All { + resolved_format == texture.desc.format + || texture.desc.view_formats.contains(&resolved_format) + } else { + Some(resolved_format) + == texture + .desc + .format + .aspect_specific_format(desc.range.aspect) + }; + if !format_is_good { + return Err(resource::CreateTextureViewError::FormatReinterpretation { + texture: texture.desc.format, + view: resolved_format, + }); + } + + // check if multisampled texture is seen as anything but 2D + if texture.desc.sample_count > 1 && resolved_dimension != wgt::TextureViewDimension::D2 { + return Err( + resource::CreateTextureViewError::InvalidMultisampledTextureViewDimension( + resolved_dimension, + ), + ); + } + + // check if the dimension is compatible with the texture + if texture.desc.dimension != resolved_dimension.compatible_texture_dimension() { + return Err( + resource::CreateTextureViewError::InvalidTextureViewDimension { + view: resolved_dimension, + texture: texture.desc.dimension, + }, + ); + } + + match resolved_dimension { + TextureViewDimension::D1 | TextureViewDimension::D2 | TextureViewDimension::D3 => { + if resolved_array_layer_count != 1 { + return Err(resource::CreateTextureViewError::InvalidArrayLayerCount { + requested: resolved_array_layer_count, + dim: resolved_dimension, + }); + } + } + TextureViewDimension::Cube => { + if resolved_array_layer_count != 6 { + return Err( + resource::CreateTextureViewError::InvalidCubemapTextureDepth { + depth: resolved_array_layer_count, + }, + ); + } + } + TextureViewDimension::CubeArray => { + if resolved_array_layer_count % 6 != 0 { + return Err( + resource::CreateTextureViewError::InvalidCubemapArrayTextureDepth { + depth: resolved_array_layer_count, + }, + ); + } + } + _ => {} + } + + match resolved_dimension { + TextureViewDimension::Cube | TextureViewDimension::CubeArray => { + if texture.desc.size.width != texture.desc.size.height { + return Err(resource::CreateTextureViewError::InvalidCubeTextureViewSize); + } + } + _ => {} + } + + if resolved_mip_level_count == 0 { + return Err(resource::CreateTextureViewError::ZeroMipLevelCount); + } + + let mip_level_end = desc + .range + .base_mip_level + .saturating_add(resolved_mip_level_count); + + let level_end = texture.desc.mip_level_count; + if mip_level_end > level_end { + return Err(resource::CreateTextureViewError::TooManyMipLevels { + requested: mip_level_end, + total: level_end, + }); + } + + if resolved_array_layer_count == 0 { + return Err(resource::CreateTextureViewError::ZeroArrayLayerCount); + } + + let array_layer_end = desc + .range + .base_array_layer + .saturating_add(resolved_array_layer_count); + + let layer_end = texture.desc.array_layer_count(); + if array_layer_end > layer_end { + return Err(resource::CreateTextureViewError::TooManyArrayLayers { + requested: array_layer_end, + total: layer_end, + }); + }; + + // https://gpuweb.github.io/gpuweb/#abstract-opdef-renderable-texture-view + let render_extent = 'b: loop { + if !texture + .desc + .usage + .contains(wgt::TextureUsages::RENDER_ATTACHMENT) + { + break 'b Err(TextureViewNotRenderableReason::Usage(texture.desc.usage)); + } + + if !(resolved_dimension == TextureViewDimension::D2 + || (self.features.contains(wgt::Features::MULTIVIEW) + && resolved_dimension == TextureViewDimension::D2Array)) + { + break 'b Err(TextureViewNotRenderableReason::Dimension( + resolved_dimension, + )); + } + + if resolved_mip_level_count != 1 { + break 'b Err(TextureViewNotRenderableReason::MipLevelCount( + resolved_mip_level_count, + )); + } + + if resolved_array_layer_count != 1 + && !(self.features.contains(wgt::Features::MULTIVIEW)) + { + break 'b Err(TextureViewNotRenderableReason::ArrayLayerCount( + resolved_array_layer_count, + )); + } + + if aspects != hal::FormatAspects::from(texture.desc.format) { + break 'b Err(TextureViewNotRenderableReason::Aspects(aspects)); + } + + break 'b Ok(texture + .desc + .compute_render_extent(desc.range.base_mip_level)); + }; + + // filter the usages based on the other criteria + let usage = { + let mask_copy = !(hal::TextureUses::COPY_SRC | hal::TextureUses::COPY_DST); + let mask_dimension = match resolved_dimension { + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + hal::TextureUses::RESOURCE + } + wgt::TextureViewDimension::D3 => { + hal::TextureUses::RESOURCE + | hal::TextureUses::STORAGE_READ + | hal::TextureUses::STORAGE_READ_WRITE + } + _ => hal::TextureUses::all(), + }; + let mask_mip_level = if resolved_mip_level_count == 1 { + hal::TextureUses::all() + } else { + hal::TextureUses::RESOURCE + }; + texture.hal_usage & mask_copy & mask_dimension & mask_mip_level + }; + + log::debug!( + "Create view for texture {:?} filters usages to {:?}", + texture.as_info().id(), + usage + ); + + // use the combined depth-stencil format for the view + let format = if resolved_format.is_depth_stencil_component(texture.desc.format) { + texture.desc.format + } else { + resolved_format + }; + + let resolved_range = wgt::ImageSubresourceRange { + aspect: desc.range.aspect, + base_mip_level: desc.range.base_mip_level, + mip_level_count: Some(resolved_mip_level_count), + base_array_layer: desc.range.base_array_layer, + array_layer_count: Some(resolved_array_layer_count), + }; + + let hal_desc = hal::TextureViewDescriptor { + label: desc.label.to_hal(self.instance_flags), + format, + dimension: resolved_dimension, + usage, + range: resolved_range, + }; + + let raw = unsafe { + self.raw + .as_ref() + .unwrap() + .create_texture_view(texture_raw, &hal_desc) + .map_err(|_| resource::CreateTextureViewError::OutOfMemory)? + }; + + let selector = TextureSelector { + mips: desc.range.base_mip_level..mip_level_end, + layers: desc.range.base_array_layer..array_layer_end, + }; + + Ok(TextureView { + raw: Snatchable::new(raw), + parent: texture.clone(), + device: self.clone(), + desc: resource::HalTextureViewDescriptor { + texture_format: texture.desc.format, + format: resolved_format, + dimension: resolved_dimension, + range: resolved_range, + }, + format_features: texture.format_features, + render_extent, + samples: texture.desc.sample_count, + selector, + info: ResourceInfo::new(desc.label.borrow_or_default()), + }) + } + + pub(crate) fn create_sampler( + self: &Arc<Self>, + desc: &resource::SamplerDescriptor, + ) -> Result<Sampler<A>, resource::CreateSamplerError> { + if desc + .address_modes + .iter() + .any(|am| am == &wgt::AddressMode::ClampToBorder) + { + self.require_features(wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER)?; + } + + if desc.border_color == Some(wgt::SamplerBorderColor::Zero) { + self.require_features(wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO)?; + } + + if desc.lod_min_clamp < 0.0 { + return Err(resource::CreateSamplerError::InvalidLodMinClamp( + desc.lod_min_clamp, + )); + } + if desc.lod_max_clamp < desc.lod_min_clamp { + return Err(resource::CreateSamplerError::InvalidLodMaxClamp { + lod_min_clamp: desc.lod_min_clamp, + lod_max_clamp: desc.lod_max_clamp, + }); + } + + if desc.anisotropy_clamp < 1 { + return Err(resource::CreateSamplerError::InvalidAnisotropy( + desc.anisotropy_clamp, + )); + } + + if desc.anisotropy_clamp != 1 { + if !matches!(desc.min_filter, wgt::FilterMode::Linear) { + return Err( + resource::CreateSamplerError::InvalidFilterModeWithAnisotropy { + filter_type: resource::SamplerFilterErrorType::MinFilter, + filter_mode: desc.min_filter, + anisotropic_clamp: desc.anisotropy_clamp, + }, + ); + } + if !matches!(desc.mag_filter, wgt::FilterMode::Linear) { + return Err( + resource::CreateSamplerError::InvalidFilterModeWithAnisotropy { + filter_type: resource::SamplerFilterErrorType::MagFilter, + filter_mode: desc.mag_filter, + anisotropic_clamp: desc.anisotropy_clamp, + }, + ); + } + if !matches!(desc.mipmap_filter, wgt::FilterMode::Linear) { + return Err( + resource::CreateSamplerError::InvalidFilterModeWithAnisotropy { + filter_type: resource::SamplerFilterErrorType::MipmapFilter, + filter_mode: desc.mipmap_filter, + anisotropic_clamp: desc.anisotropy_clamp, + }, + ); + } + } + + let anisotropy_clamp = if self + .downlevel + .flags + .contains(wgt::DownlevelFlags::ANISOTROPIC_FILTERING) + { + // Clamp anisotropy clamp to [1, 16] per the wgpu-hal interface + desc.anisotropy_clamp.min(16) + } else { + // If it isn't supported, set this unconditionally to 1 + 1 + }; + + //TODO: check for wgt::DownlevelFlags::COMPARISON_SAMPLERS + + let hal_desc = hal::SamplerDescriptor { + label: desc.label.to_hal(self.instance_flags), + address_modes: desc.address_modes, + mag_filter: desc.mag_filter, + min_filter: desc.min_filter, + mipmap_filter: desc.mipmap_filter, + lod_clamp: desc.lod_min_clamp..desc.lod_max_clamp, + compare: desc.compare, + anisotropy_clamp, + border_color: desc.border_color, + }; + + let raw = unsafe { + self.raw + .as_ref() + .unwrap() + .create_sampler(&hal_desc) + .map_err(DeviceError::from)? + }; + Ok(Sampler { + raw: Some(raw), + device: self.clone(), + info: ResourceInfo::new(desc.label.borrow_or_default()), + comparison: desc.compare.is_some(), + filtering: desc.min_filter == wgt::FilterMode::Linear + || desc.mag_filter == wgt::FilterMode::Linear, + }) + } + + pub(crate) fn create_shader_module<'a>( + self: &Arc<Self>, + desc: &pipeline::ShaderModuleDescriptor<'a>, + source: pipeline::ShaderModuleSource<'a>, + ) -> Result<pipeline::ShaderModule<A>, pipeline::CreateShaderModuleError> { + let (module, source) = match source { + #[cfg(feature = "wgsl")] + pipeline::ShaderModuleSource::Wgsl(code) => { + profiling::scope!("naga::front::wgsl::parse_str"); + let module = naga::front::wgsl::parse_str(&code).map_err(|inner| { + pipeline::CreateShaderModuleError::Parsing(pipeline::ShaderError { + source: code.to_string(), + label: desc.label.as_ref().map(|l| l.to_string()), + inner: Box::new(inner), + }) + })?; + (Cow::Owned(module), code.into_owned()) + } + #[cfg(feature = "spirv")] + pipeline::ShaderModuleSource::SpirV(spv, options) => { + let parser = naga::front::spv::Frontend::new(spv.iter().cloned(), &options); + profiling::scope!("naga::front::spv::Frontend"); + let module = parser.parse().map_err(|inner| { + pipeline::CreateShaderModuleError::ParsingSpirV(pipeline::ShaderError { + source: String::new(), + label: desc.label.as_ref().map(|l| l.to_string()), + inner: Box::new(inner), + }) + })?; + (Cow::Owned(module), String::new()) + } + #[cfg(feature = "glsl")] + pipeline::ShaderModuleSource::Glsl(code, options) => { + let mut parser = naga::front::glsl::Frontend::default(); + profiling::scope!("naga::front::glsl::Frontend.parse"); + let module = parser.parse(&options, &code).map_err(|inner| { + pipeline::CreateShaderModuleError::ParsingGlsl(pipeline::ShaderError { + source: code.to_string(), + label: desc.label.as_ref().map(|l| l.to_string()), + inner: Box::new(inner), + }) + })?; + (Cow::Owned(module), code.into_owned()) + } + pipeline::ShaderModuleSource::Naga(module) => (module, String::new()), + pipeline::ShaderModuleSource::Dummy(_) => panic!("found `ShaderModuleSource::Dummy`"), + }; + for (_, var) in module.global_variables.iter() { + match var.binding { + Some(ref br) if br.group >= self.limits.max_bind_groups => { + return Err(pipeline::CreateShaderModuleError::InvalidGroupIndex { + bind: br.clone(), + group: br.group, + limit: self.limits.max_bind_groups, + }); + } + _ => continue, + }; + } + + use naga::valid::Capabilities as Caps; + profiling::scope!("naga::validate"); + + let mut caps = Caps::empty(); + caps.set( + Caps::PUSH_CONSTANT, + self.features.contains(wgt::Features::PUSH_CONSTANTS), + ); + caps.set( + Caps::FLOAT64, + self.features.contains(wgt::Features::SHADER_F64), + ); + caps.set( + Caps::PRIMITIVE_INDEX, + self.features + .contains(wgt::Features::SHADER_PRIMITIVE_INDEX), + ); + caps.set( + Caps::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + self.features.contains( + wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + ), + ); + caps.set( + Caps::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + self.features.contains( + wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + ), + ); + // TODO: This needs a proper wgpu feature + caps.set( + Caps::SAMPLER_NON_UNIFORM_INDEXING, + self.features.contains( + wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + ), + ); + caps.set( + Caps::STORAGE_TEXTURE_16BIT_NORM_FORMATS, + self.features + .contains(wgt::Features::TEXTURE_FORMAT_16BIT_NORM), + ); + caps.set( + Caps::MULTIVIEW, + self.features.contains(wgt::Features::MULTIVIEW), + ); + caps.set( + Caps::EARLY_DEPTH_TEST, + self.features + .contains(wgt::Features::SHADER_EARLY_DEPTH_TEST), + ); + caps.set( + Caps::MULTISAMPLED_SHADING, + self.downlevel + .flags + .contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING), + ); + caps.set( + Caps::DUAL_SOURCE_BLENDING, + self.features.contains(wgt::Features::DUAL_SOURCE_BLENDING), + ); + caps.set( + Caps::CUBE_ARRAY_TEXTURES, + self.downlevel + .flags + .contains(wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES), + ); + + let debug_source = + if self.instance_flags.contains(wgt::InstanceFlags::DEBUG) && !source.is_empty() { + Some(hal::DebugSource { + file_name: Cow::Owned( + desc.label + .as_ref() + .map_or("shader".to_string(), |l| l.to_string()), + ), + source_code: Cow::Owned(source.clone()), + }) + } else { + None + }; + + let info = naga::valid::Validator::new(naga::valid::ValidationFlags::all(), caps) + .validate(&module) + .map_err(|inner| { + pipeline::CreateShaderModuleError::Validation(pipeline::ShaderError { + source, + label: desc.label.as_ref().map(|l| l.to_string()), + inner: Box::new(inner), + }) + })?; + + let interface = + validation::Interface::new(&module, &info, self.limits.clone(), self.features); + let hal_shader = hal::ShaderInput::Naga(hal::NagaShader { + module, + info, + debug_source, + }); + let hal_desc = hal::ShaderModuleDescriptor { + label: desc.label.to_hal(self.instance_flags), + runtime_checks: desc.shader_bound_checks.runtime_checks(), + }; + let raw = match unsafe { + self.raw + .as_ref() + .unwrap() + .create_shader_module(&hal_desc, hal_shader) + } { + Ok(raw) => raw, + Err(error) => { + return Err(match error { + hal::ShaderError::Device(error) => { + pipeline::CreateShaderModuleError::Device(error.into()) + } + hal::ShaderError::Compilation(ref msg) => { + log::error!("Shader error: {}", msg); + pipeline::CreateShaderModuleError::Generation + } + }) + } + }; + + Ok(pipeline::ShaderModule { + raw: Some(raw), + device: self.clone(), + interface: Some(interface), + info: ResourceInfo::new(desc.label.borrow_or_default()), + label: desc.label.borrow_or_default().to_string(), + }) + } + + #[allow(unused_unsafe)] + pub(crate) unsafe fn create_shader_module_spirv<'a>( + self: &Arc<Self>, + desc: &pipeline::ShaderModuleDescriptor<'a>, + source: &'a [u32], + ) -> Result<pipeline::ShaderModule<A>, pipeline::CreateShaderModuleError> { + self.require_features(wgt::Features::SPIRV_SHADER_PASSTHROUGH)?; + let hal_desc = hal::ShaderModuleDescriptor { + label: desc.label.to_hal(self.instance_flags), + runtime_checks: desc.shader_bound_checks.runtime_checks(), + }; + let hal_shader = hal::ShaderInput::SpirV(source); + let raw = match unsafe { + self.raw + .as_ref() + .unwrap() + .create_shader_module(&hal_desc, hal_shader) + } { + Ok(raw) => raw, + Err(error) => { + return Err(match error { + hal::ShaderError::Device(error) => { + pipeline::CreateShaderModuleError::Device(error.into()) + } + hal::ShaderError::Compilation(ref msg) => { + log::error!("Shader error: {}", msg); + pipeline::CreateShaderModuleError::Generation + } + }) + } + }; + + Ok(pipeline::ShaderModule { + raw: Some(raw), + device: self.clone(), + interface: None, + info: ResourceInfo::new(desc.label.borrow_or_default()), + label: desc.label.borrow_or_default().to_string(), + }) + } + + /// Generate information about late-validated buffer bindings for pipelines. + //TODO: should this be combined with `get_introspection_bind_group_layouts` in some way? + pub(crate) fn make_late_sized_buffer_groups( + shader_binding_sizes: &FastHashMap<naga::ResourceBinding, wgt::BufferSize>, + layout: &binding_model::PipelineLayout<A>, + ) -> ArrayVec<pipeline::LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }> { + // Given the shader-required binding sizes and the pipeline layout, + // return the filtered list of them in the layout order, + // removing those with given `min_binding_size`. + layout + .bind_group_layouts + .iter() + .enumerate() + .map(|(group_index, bgl)| pipeline::LateSizedBufferGroup { + shader_sizes: bgl + .entries + .values() + .filter_map(|entry| match entry.ty { + wgt::BindingType::Buffer { + min_binding_size: None, + .. + } => { + let rb = naga::ResourceBinding { + group: group_index as u32, + binding: entry.binding, + }; + let shader_size = + shader_binding_sizes.get(&rb).map_or(0, |nz| nz.get()); + Some(shader_size) + } + _ => None, + }) + .collect(), + }) + .collect() + } + + pub(crate) fn create_bind_group_layout( + self: &Arc<Self>, + label: &crate::Label, + entry_map: bgl::EntryMap, + origin: bgl::Origin, + ) -> Result<BindGroupLayout<A>, binding_model::CreateBindGroupLayoutError> { + #[derive(PartialEq)] + enum WritableStorage { + Yes, + No, + } + + for entry in entry_map.values() { + use wgt::BindingType as Bt; + + let mut required_features = wgt::Features::empty(); + let mut required_downlevel_flags = wgt::DownlevelFlags::empty(); + let (array_feature, writable_storage) = match entry.ty { + Bt::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: _, + } => ( + Some(wgt::Features::BUFFER_BINDING_ARRAY), + WritableStorage::No, + ), + Bt::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: true, + min_binding_size: _, + } => ( + Some(wgt::Features::BUFFER_BINDING_ARRAY), + WritableStorage::No, + ), + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only }, + .. + } => ( + Some( + wgt::Features::BUFFER_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY, + ), + match read_only { + true => WritableStorage::No, + false => WritableStorage::Yes, + }, + ), + Bt::Sampler { .. } => ( + Some(wgt::Features::TEXTURE_BINDING_ARRAY), + WritableStorage::No, + ), + Bt::Texture { + multisampled: true, + sample_type: TextureSampleType::Float { filterable: true }, + .. + } => { + return Err(binding_model::CreateBindGroupLayoutError::Entry { + binding: entry.binding, + error: + BindGroupLayoutEntryError::SampleTypeFloatFilterableBindingMultisampled, + }); + } + Bt::Texture { .. } => ( + Some(wgt::Features::TEXTURE_BINDING_ARRAY), + WritableStorage::No, + ), + Bt::StorageTexture { + access, + view_dimension, + format: _, + } => { + match view_dimension { + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + return Err(binding_model::CreateBindGroupLayoutError::Entry { + binding: entry.binding, + error: BindGroupLayoutEntryError::StorageTextureCube, + }) + } + _ => (), + } + match access { + wgt::StorageTextureAccess::ReadOnly + | wgt::StorageTextureAccess::ReadWrite + if !self.features.contains( + wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES, + ) => + { + return Err(binding_model::CreateBindGroupLayoutError::Entry { + binding: entry.binding, + error: BindGroupLayoutEntryError::StorageTextureReadWrite, + }); + } + _ => (), + } + ( + Some( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY, + ), + match access { + wgt::StorageTextureAccess::WriteOnly => WritableStorage::Yes, + wgt::StorageTextureAccess::ReadOnly => { + required_features |= + wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES; + WritableStorage::No + } + wgt::StorageTextureAccess::ReadWrite => { + required_features |= + wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES; + WritableStorage::Yes + } + }, + ) + } + Bt::AccelerationStructure => todo!(), + }; + + // Validate the count parameter + if entry.count.is_some() { + required_features |= array_feature + .ok_or(BindGroupLayoutEntryError::ArrayUnsupported) + .map_err(|error| binding_model::CreateBindGroupLayoutError::Entry { + binding: entry.binding, + error, + })?; + } + + if entry.visibility.contains_invalid_bits() { + return Err( + binding_model::CreateBindGroupLayoutError::InvalidVisibility(entry.visibility), + ); + } + + if entry.visibility.contains(wgt::ShaderStages::VERTEX) { + if writable_storage == WritableStorage::Yes { + required_features |= wgt::Features::VERTEX_WRITABLE_STORAGE; + } + if let Bt::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, + .. + } = entry.ty + { + required_downlevel_flags |= wgt::DownlevelFlags::VERTEX_STORAGE; + } + } + if writable_storage == WritableStorage::Yes + && entry.visibility.contains(wgt::ShaderStages::FRAGMENT) + { + required_downlevel_flags |= wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE; + } + + self.require_features(required_features) + .map_err(BindGroupLayoutEntryError::MissingFeatures) + .map_err(|error| binding_model::CreateBindGroupLayoutError::Entry { + binding: entry.binding, + error, + })?; + self.require_downlevel_flags(required_downlevel_flags) + .map_err(BindGroupLayoutEntryError::MissingDownlevelFlags) + .map_err(|error| binding_model::CreateBindGroupLayoutError::Entry { + binding: entry.binding, + error, + })?; + } + + let bgl_flags = conv::bind_group_layout_flags(self.features); + + let hal_bindings = entry_map.values().copied().collect::<Vec<_>>(); + let label = label.to_hal(self.instance_flags); + let hal_desc = hal::BindGroupLayoutDescriptor { + label, + flags: bgl_flags, + entries: &hal_bindings, + }; + let raw = unsafe { + self.raw + .as_ref() + .unwrap() + .create_bind_group_layout(&hal_desc) + .map_err(DeviceError::from)? + }; + + let mut count_validator = binding_model::BindingTypeMaxCountValidator::default(); + for entry in entry_map.values() { + count_validator.add_binding(entry); + } + // If a single bind group layout violates limits, the pipeline layout is + // definitely going to violate limits too, lets catch it now. + count_validator + .validate(&self.limits) + .map_err(binding_model::CreateBindGroupLayoutError::TooManyBindings)?; + + Ok(BindGroupLayout { + raw: Some(raw), + device: self.clone(), + entries: entry_map, + origin, + binding_count_validator: count_validator, + info: ResourceInfo::new(label.unwrap_or("<BindGroupLayout>")), + label: label.unwrap_or_default().to_string(), + }) + } + + pub(crate) fn create_buffer_binding<'a>( + bb: &binding_model::BufferBinding, + binding: u32, + decl: &wgt::BindGroupLayoutEntry, + used_buffer_ranges: &mut Vec<BufferInitTrackerAction<A>>, + dynamic_binding_info: &mut Vec<binding_model::BindGroupDynamicBindingData>, + late_buffer_binding_sizes: &mut FastHashMap<u32, wgt::BufferSize>, + used: &mut BindGroupStates<A>, + storage: &'a Storage<Buffer<A>>, + limits: &wgt::Limits, + snatch_guard: &'a SnatchGuard<'a>, + ) -> Result<hal::BufferBinding<'a, A>, binding_model::CreateBindGroupError> { + use crate::binding_model::CreateBindGroupError as Error; + + let (binding_ty, dynamic, min_size) = match decl.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + min_binding_size, + } => (ty, has_dynamic_offset, min_binding_size), + _ => { + return Err(Error::WrongBindingType { + binding, + actual: decl.ty, + expected: "UniformBuffer, StorageBuffer or ReadonlyStorageBuffer", + }) + } + }; + let (pub_usage, internal_use, range_limit) = match binding_ty { + wgt::BufferBindingType::Uniform => ( + wgt::BufferUsages::UNIFORM, + hal::BufferUses::UNIFORM, + limits.max_uniform_buffer_binding_size, + ), + wgt::BufferBindingType::Storage { read_only } => ( + wgt::BufferUsages::STORAGE, + if read_only { + hal::BufferUses::STORAGE_READ + } else { + hal::BufferUses::STORAGE_READ_WRITE + }, + limits.max_storage_buffer_binding_size, + ), + }; + + let (align, align_limit_name) = + binding_model::buffer_binding_type_alignment(limits, binding_ty); + if bb.offset % align as u64 != 0 { + return Err(Error::UnalignedBufferOffset( + bb.offset, + align_limit_name, + align, + )); + } + + let buffer = used + .buffers + .add_single(storage, bb.buffer_id, internal_use) + .ok_or(Error::InvalidBuffer(bb.buffer_id))?; + + check_buffer_usage(buffer.usage, pub_usage)?; + let raw_buffer = buffer + .raw + .get(snatch_guard) + .ok_or(Error::InvalidBuffer(bb.buffer_id))?; + + let (bind_size, bind_end) = match bb.size { + Some(size) => { + let end = bb.offset + size.get(); + if end > buffer.size { + return Err(Error::BindingRangeTooLarge { + buffer: bb.buffer_id, + range: bb.offset..end, + size: buffer.size, + }); + } + (size.get(), end) + } + None => { + if buffer.size < bb.offset { + return Err(Error::BindingRangeTooLarge { + buffer: bb.buffer_id, + range: bb.offset..bb.offset, + size: buffer.size, + }); + } + (buffer.size - bb.offset, buffer.size) + } + }; + + if bind_size > range_limit as u64 { + return Err(Error::BufferRangeTooLarge { + binding, + given: bind_size as u32, + limit: range_limit, + }); + } + + // Record binding info for validating dynamic offsets + if dynamic { + dynamic_binding_info.push(binding_model::BindGroupDynamicBindingData { + binding_idx: binding, + buffer_size: buffer.size, + binding_range: bb.offset..bind_end, + maximum_dynamic_offset: buffer.size - bind_end, + binding_type: binding_ty, + }); + } + + if let Some(non_zero) = min_size { + let min_size = non_zero.get(); + if min_size > bind_size { + return Err(Error::BindingSizeTooSmall { + buffer: bb.buffer_id, + actual: bind_size, + min: min_size, + }); + } + } else { + let late_size = + wgt::BufferSize::new(bind_size).ok_or(Error::BindingZeroSize(bb.buffer_id))?; + late_buffer_binding_sizes.insert(binding, late_size); + } + + assert_eq!(bb.offset % wgt::COPY_BUFFER_ALIGNMENT, 0); + used_buffer_ranges.extend(buffer.initialization_status.read().create_action( + buffer, + bb.offset..bb.offset + bind_size, + MemoryInitKind::NeedsInitializedMemory, + )); + + Ok(hal::BufferBinding { + buffer: raw_buffer, + offset: bb.offset, + size: bb.size, + }) + } + + pub(crate) fn create_texture_binding( + view: &TextureView<A>, + internal_use: hal::TextureUses, + pub_usage: wgt::TextureUsages, + used: &mut BindGroupStates<A>, + used_texture_ranges: &mut Vec<TextureInitTrackerAction<A>>, + ) -> Result<(), binding_model::CreateBindGroupError> { + let texture = &view.parent; + let texture_id = texture.as_info().id(); + // Careful here: the texture may no longer have its own ref count, + // if it was deleted by the user. + let texture = used + .textures + .add_single(texture, Some(view.selector.clone()), internal_use) + .ok_or(binding_model::CreateBindGroupError::InvalidTexture( + texture_id, + ))?; + + if texture.device.as_info().id() != view.device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + + check_texture_usage(texture.desc.usage, pub_usage)?; + + used_texture_ranges.push(TextureInitTrackerAction { + texture: texture.clone(), + range: TextureInitRange { + mip_range: view.desc.range.mip_range(texture.desc.mip_level_count), + layer_range: view + .desc + .range + .layer_range(texture.desc.array_layer_count()), + }, + kind: MemoryInitKind::NeedsInitializedMemory, + }); + + Ok(()) + } + + // This function expects the provided bind group layout to be resolved + // (not passing a duplicate) beforehand. + pub(crate) fn create_bind_group( + self: &Arc<Self>, + layout: &Arc<BindGroupLayout<A>>, + desc: &binding_model::BindGroupDescriptor, + hub: &Hub<A>, + ) -> Result<BindGroup<A>, binding_model::CreateBindGroupError> { + use crate::binding_model::{BindingResource as Br, CreateBindGroupError as Error}; + { + // Check that the number of entries in the descriptor matches + // the number of entries in the layout. + let actual = desc.entries.len(); + let expected = layout.entries.len(); + if actual != expected { + return Err(Error::BindingsNumMismatch { expected, actual }); + } + } + + // TODO: arrayvec/smallvec, or re-use allocations + // Record binding info for dynamic offset validation + let mut dynamic_binding_info = Vec::new(); + // Map of binding -> shader reflected size + //Note: we can't collect into a vector right away because + // it needs to be in BGL iteration order, not BG entry order. + let mut late_buffer_binding_sizes = FastHashMap::default(); + // fill out the descriptors + let mut used = BindGroupStates::new(); + + let buffer_guard = hub.buffers.read(); + let texture_view_guard = hub.texture_views.read(); + let sampler_guard = hub.samplers.read(); + + let mut used_buffer_ranges = Vec::new(); + let mut used_texture_ranges = Vec::new(); + let mut hal_entries = Vec::with_capacity(desc.entries.len()); + let mut hal_buffers = Vec::new(); + let mut hal_samplers = Vec::new(); + let mut hal_textures = Vec::new(); + let snatch_guard = self.snatchable_lock.read(); + for entry in desc.entries.iter() { + let binding = entry.binding; + // Find the corresponding declaration in the layout + let decl = layout + .entries + .get(binding) + .ok_or(Error::MissingBindingDeclaration(binding))?; + let (res_index, count) = match entry.resource { + Br::Buffer(ref bb) => { + let bb = Self::create_buffer_binding( + bb, + binding, + decl, + &mut used_buffer_ranges, + &mut dynamic_binding_info, + &mut late_buffer_binding_sizes, + &mut used, + &*buffer_guard, + &self.limits, + &snatch_guard, + )?; + + let res_index = hal_buffers.len(); + hal_buffers.push(bb); + (res_index, 1) + } + Br::BufferArray(ref bindings_array) => { + let num_bindings = bindings_array.len(); + Self::check_array_binding(self.features, decl.count, num_bindings)?; + + let res_index = hal_buffers.len(); + for bb in bindings_array.iter() { + let bb = Self::create_buffer_binding( + bb, + binding, + decl, + &mut used_buffer_ranges, + &mut dynamic_binding_info, + &mut late_buffer_binding_sizes, + &mut used, + &*buffer_guard, + &self.limits, + &snatch_guard, + )?; + hal_buffers.push(bb); + } + (res_index, num_bindings) + } + Br::Sampler(id) => { + match decl.ty { + wgt::BindingType::Sampler(ty) => { + let sampler = used + .samplers + .add_single(&*sampler_guard, id) + .ok_or(Error::InvalidSampler(id))?; + + if sampler.device.as_info().id() != self.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + + // Allowed sampler values for filtering and comparison + let (allowed_filtering, allowed_comparison) = match ty { + wgt::SamplerBindingType::Filtering => (None, false), + wgt::SamplerBindingType::NonFiltering => (Some(false), false), + wgt::SamplerBindingType::Comparison => (None, true), + }; + + if let Some(allowed_filtering) = allowed_filtering { + if allowed_filtering != sampler.filtering { + return Err(Error::WrongSamplerFiltering { + binding, + layout_flt: allowed_filtering, + sampler_flt: sampler.filtering, + }); + } + } + + if allowed_comparison != sampler.comparison { + return Err(Error::WrongSamplerComparison { + binding, + layout_cmp: allowed_comparison, + sampler_cmp: sampler.comparison, + }); + } + + let res_index = hal_samplers.len(); + hal_samplers.push(sampler.raw()); + (res_index, 1) + } + _ => { + return Err(Error::WrongBindingType { + binding, + actual: decl.ty, + expected: "Sampler", + }) + } + } + } + Br::SamplerArray(ref bindings_array) => { + let num_bindings = bindings_array.len(); + Self::check_array_binding(self.features, decl.count, num_bindings)?; + + let res_index = hal_samplers.len(); + for &id in bindings_array.iter() { + let sampler = used + .samplers + .add_single(&*sampler_guard, id) + .ok_or(Error::InvalidSampler(id))?; + if sampler.device.as_info().id() != self.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + hal_samplers.push(sampler.raw()); + } + + (res_index, num_bindings) + } + Br::TextureView(id) => { + let view = used + .views + .add_single(&*texture_view_guard, id) + .ok_or(Error::InvalidTextureView(id))?; + let (pub_usage, internal_use) = self.texture_use_parameters( + binding, + decl, + view, + "SampledTexture, ReadonlyStorageTexture or WriteonlyStorageTexture", + )?; + Self::create_texture_binding( + view, + internal_use, + pub_usage, + &mut used, + &mut used_texture_ranges, + )?; + let res_index = hal_textures.len(); + hal_textures.push(hal::TextureBinding { + view: view + .raw(&snatch_guard) + .ok_or(Error::InvalidTextureView(id))?, + usage: internal_use, + }); + (res_index, 1) + } + Br::TextureViewArray(ref bindings_array) => { + let num_bindings = bindings_array.len(); + Self::check_array_binding(self.features, decl.count, num_bindings)?; + + let res_index = hal_textures.len(); + for &id in bindings_array.iter() { + let view = used + .views + .add_single(&*texture_view_guard, id) + .ok_or(Error::InvalidTextureView(id))?; + let (pub_usage, internal_use) = + self.texture_use_parameters(binding, decl, view, + "SampledTextureArray, ReadonlyStorageTextureArray or WriteonlyStorageTextureArray")?; + Self::create_texture_binding( + view, + internal_use, + pub_usage, + &mut used, + &mut used_texture_ranges, + )?; + hal_textures.push(hal::TextureBinding { + view: view + .raw(&snatch_guard) + .ok_or(Error::InvalidTextureView(id))?, + usage: internal_use, + }); + } + + (res_index, num_bindings) + } + }; + + hal_entries.push(hal::BindGroupEntry { + binding, + resource_index: res_index as u32, + count: count as u32, + }); + } + + used.optimize(); + + hal_entries.sort_by_key(|entry| entry.binding); + for (a, b) in hal_entries.iter().zip(hal_entries.iter().skip(1)) { + if a.binding == b.binding { + return Err(Error::DuplicateBinding(a.binding)); + } + } + let hal_desc = hal::BindGroupDescriptor { + label: desc.label.to_hal(self.instance_flags), + layout: layout.raw(), + entries: &hal_entries, + buffers: &hal_buffers, + samplers: &hal_samplers, + textures: &hal_textures, + acceleration_structures: &[], + }; + let raw = unsafe { + self.raw + .as_ref() + .unwrap() + .create_bind_group(&hal_desc) + .map_err(DeviceError::from)? + }; + + Ok(BindGroup { + raw: Snatchable::new(raw), + device: self.clone(), + layout: layout.clone(), + info: ResourceInfo::new(desc.label.borrow_or_default()), + used, + used_buffer_ranges, + used_texture_ranges, + dynamic_binding_info, + // collect in the order of BGL iteration + late_buffer_binding_sizes: layout + .entries + .indices() + .flat_map(|binding| late_buffer_binding_sizes.get(&binding).cloned()) + .collect(), + }) + } + + pub(crate) fn check_array_binding( + features: wgt::Features, + count: Option<NonZeroU32>, + num_bindings: usize, + ) -> Result<(), super::binding_model::CreateBindGroupError> { + use super::binding_model::CreateBindGroupError as Error; + + if let Some(count) = count { + let count = count.get() as usize; + if count < num_bindings { + return Err(Error::BindingArrayPartialLengthMismatch { + actual: num_bindings, + expected: count, + }); + } + if count != num_bindings + && !features.contains(wgt::Features::PARTIALLY_BOUND_BINDING_ARRAY) + { + return Err(Error::BindingArrayLengthMismatch { + actual: num_bindings, + expected: count, + }); + } + if num_bindings == 0 { + return Err(Error::BindingArrayZeroLength); + } + } else { + return Err(Error::SingleBindingExpected); + }; + + Ok(()) + } + + pub(crate) fn texture_use_parameters( + self: &Arc<Self>, + binding: u32, + decl: &wgt::BindGroupLayoutEntry, + view: &TextureView<A>, + expected: &'static str, + ) -> Result<(wgt::TextureUsages, hal::TextureUses), binding_model::CreateBindGroupError> { + use crate::binding_model::CreateBindGroupError as Error; + if view + .desc + .aspects() + .contains(hal::FormatAspects::DEPTH | hal::FormatAspects::STENCIL) + { + return Err(Error::DepthStencilAspect); + } + match decl.ty { + wgt::BindingType::Texture { + sample_type, + view_dimension, + multisampled, + } => { + use wgt::TextureSampleType as Tst; + if multisampled != (view.samples != 1) { + return Err(Error::InvalidTextureMultisample { + binding, + layout_multisampled: multisampled, + view_samples: view.samples, + }); + } + let compat_sample_type = view + .desc + .format + .sample_type(Some(view.desc.range.aspect), Some(self.features)) + .unwrap(); + match (sample_type, compat_sample_type) { + (Tst::Uint, Tst::Uint) | + (Tst::Sint, Tst::Sint) | + (Tst::Depth, Tst::Depth) | + // if we expect non-filterable, accept anything float + (Tst::Float { filterable: false }, Tst::Float { .. }) | + // if we expect filterable, require it + (Tst::Float { filterable: true }, Tst::Float { filterable: true }) | + // if we expect non-filterable, also accept depth + (Tst::Float { filterable: false }, Tst::Depth) => {} + // if we expect filterable, also accept Float that is defined as + // unfilterable if filterable feature is explicitly enabled (only hit + // if wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES is + // enabled) + (Tst::Float { filterable: true }, Tst::Float { .. }) if view.format_features.flags.contains(wgt::TextureFormatFeatureFlags::FILTERABLE) => {} + _ => { + return Err(Error::InvalidTextureSampleType { + binding, + layout_sample_type: sample_type, + view_format: view.desc.format, + }) + } + } + if view_dimension != view.desc.dimension { + return Err(Error::InvalidTextureDimension { + binding, + layout_dimension: view_dimension, + view_dimension: view.desc.dimension, + }); + } + Ok(( + wgt::TextureUsages::TEXTURE_BINDING, + hal::TextureUses::RESOURCE, + )) + } + wgt::BindingType::StorageTexture { + access, + format, + view_dimension, + } => { + if format != view.desc.format { + return Err(Error::InvalidStorageTextureFormat { + binding, + layout_format: format, + view_format: view.desc.format, + }); + } + if view_dimension != view.desc.dimension { + return Err(Error::InvalidTextureDimension { + binding, + layout_dimension: view_dimension, + view_dimension: view.desc.dimension, + }); + } + + let mip_level_count = view.selector.mips.end - view.selector.mips.start; + if mip_level_count != 1 { + return Err(Error::InvalidStorageTextureMipLevelCount { + binding, + mip_level_count, + }); + } + + let internal_use = match access { + wgt::StorageTextureAccess::WriteOnly => hal::TextureUses::STORAGE_READ_WRITE, + wgt::StorageTextureAccess::ReadOnly => { + if !view + .format_features + .flags + .contains(wgt::TextureFormatFeatureFlags::STORAGE_READ_WRITE) + { + return Err(Error::StorageReadNotSupported(view.desc.format)); + } + hal::TextureUses::STORAGE_READ + } + wgt::StorageTextureAccess::ReadWrite => { + if !view + .format_features + .flags + .contains(wgt::TextureFormatFeatureFlags::STORAGE_READ_WRITE) + { + return Err(Error::StorageReadNotSupported(view.desc.format)); + } + + hal::TextureUses::STORAGE_READ_WRITE + } + }; + Ok((wgt::TextureUsages::STORAGE_BINDING, internal_use)) + } + _ => Err(Error::WrongBindingType { + binding, + actual: decl.ty, + expected, + }), + } + } + + pub(crate) fn create_pipeline_layout( + self: &Arc<Self>, + desc: &binding_model::PipelineLayoutDescriptor, + bgl_registry: &Registry<BindGroupLayout<A>>, + ) -> Result<binding_model::PipelineLayout<A>, binding_model::CreatePipelineLayoutError> { + use crate::binding_model::CreatePipelineLayoutError as Error; + + let bind_group_layouts_count = desc.bind_group_layouts.len(); + let device_max_bind_groups = self.limits.max_bind_groups as usize; + if bind_group_layouts_count > device_max_bind_groups { + return Err(Error::TooManyGroups { + actual: bind_group_layouts_count, + max: device_max_bind_groups, + }); + } + + if !desc.push_constant_ranges.is_empty() { + self.require_features(wgt::Features::PUSH_CONSTANTS)?; + } + + let mut used_stages = wgt::ShaderStages::empty(); + for (index, pc) in desc.push_constant_ranges.iter().enumerate() { + if pc.stages.intersects(used_stages) { + return Err(Error::MoreThanOnePushConstantRangePerStage { + index, + provided: pc.stages, + intersected: pc.stages & used_stages, + }); + } + used_stages |= pc.stages; + + let device_max_pc_size = self.limits.max_push_constant_size; + if device_max_pc_size < pc.range.end { + return Err(Error::PushConstantRangeTooLarge { + index, + range: pc.range.clone(), + max: device_max_pc_size, + }); + } + + if pc.range.start % wgt::PUSH_CONSTANT_ALIGNMENT != 0 { + return Err(Error::MisalignedPushConstantRange { + index, + bound: pc.range.start, + }); + } + if pc.range.end % wgt::PUSH_CONSTANT_ALIGNMENT != 0 { + return Err(Error::MisalignedPushConstantRange { + index, + bound: pc.range.end, + }); + } + } + + let mut count_validator = binding_model::BindingTypeMaxCountValidator::default(); + + // Collect references to the BGLs + let mut bind_group_layouts = ArrayVec::new(); + for &id in desc.bind_group_layouts.iter() { + let Ok(bgl) = bgl_registry.get(id) else { + return Err(Error::InvalidBindGroupLayout(id)); + }; + + bind_group_layouts.push(bgl); + } + + // Validate total resource counts and check for a matching device + for bgl in &bind_group_layouts { + if bgl.device.as_info().id() != self.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + + count_validator.merge(&bgl.binding_count_validator); + } + + count_validator + .validate(&self.limits) + .map_err(Error::TooManyBindings)?; + + let raw_bind_group_layouts = bind_group_layouts + .iter() + .map(|bgl| bgl.raw()) + .collect::<ArrayVec<_, { hal::MAX_BIND_GROUPS }>>(); + + let hal_desc = hal::PipelineLayoutDescriptor { + label: desc.label.to_hal(self.instance_flags), + flags: hal::PipelineLayoutFlags::FIRST_VERTEX_INSTANCE, + bind_group_layouts: &raw_bind_group_layouts, + push_constant_ranges: desc.push_constant_ranges.as_ref(), + }; + + let raw = unsafe { + self.raw + .as_ref() + .unwrap() + .create_pipeline_layout(&hal_desc) + .map_err(DeviceError::from)? + }; + + drop(raw_bind_group_layouts); + + Ok(binding_model::PipelineLayout { + raw: Some(raw), + device: self.clone(), + info: ResourceInfo::new(desc.label.borrow_or_default()), + bind_group_layouts, + push_constant_ranges: desc.push_constant_ranges.iter().cloned().collect(), + }) + } + + //TODO: refactor this. It's the only method of `Device` that registers new objects + // (the pipeline layout). + pub(crate) fn derive_pipeline_layout( + self: &Arc<Self>, + implicit_context: Option<ImplicitPipelineContext>, + mut derived_group_layouts: ArrayVec<bgl::EntryMap, { hal::MAX_BIND_GROUPS }>, + bgl_registry: &Registry<BindGroupLayout<A>>, + pipeline_layout_registry: &Registry<binding_model::PipelineLayout<A>>, + ) -> Result<Arc<binding_model::PipelineLayout<A>>, pipeline::ImplicitLayoutError> { + while derived_group_layouts + .last() + .map_or(false, |map| map.is_empty()) + { + derived_group_layouts.pop(); + } + let mut ids = implicit_context.ok_or(pipeline::ImplicitLayoutError::MissingIds(0))?; + let group_count = derived_group_layouts.len(); + if ids.group_ids.len() < group_count { + log::error!( + "Not enough bind group IDs ({}) specified for the implicit layout ({})", + ids.group_ids.len(), + derived_group_layouts.len() + ); + return Err(pipeline::ImplicitLayoutError::MissingIds(group_count as _)); + } + + for (bgl_id, map) in ids.group_ids.iter_mut().zip(derived_group_layouts) { + let bgl = self.create_bind_group_layout(&None, map, bgl::Origin::Derived)?; + bgl_registry.force_replace(*bgl_id, bgl); + } + + let layout_desc = binding_model::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: Cow::Borrowed(&ids.group_ids[..group_count]), + push_constant_ranges: Cow::Borrowed(&[]), //TODO? + }; + let layout = self.create_pipeline_layout(&layout_desc, bgl_registry)?; + pipeline_layout_registry.force_replace(ids.root_id, layout); + Ok(pipeline_layout_registry.get(ids.root_id).unwrap()) + } + + pub(crate) fn create_compute_pipeline( + self: &Arc<Self>, + desc: &pipeline::ComputePipelineDescriptor, + implicit_context: Option<ImplicitPipelineContext>, + hub: &Hub<A>, + ) -> Result<pipeline::ComputePipeline<A>, pipeline::CreateComputePipelineError> { + // This has to be done first, or otherwise the IDs may be pointing to entries + // that are not even in the storage. + if let Some(ref ids) = implicit_context { + let mut pipeline_layout_guard = hub.pipeline_layouts.write(); + pipeline_layout_guard.insert_error(ids.root_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL); + let mut bgl_guard = hub.bind_group_layouts.write(); + for &bgl_id in ids.group_ids.iter() { + bgl_guard.insert_error(bgl_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL); + } + } + + self.require_downlevel_flags(wgt::DownlevelFlags::COMPUTE_SHADERS)?; + + let shader_module = hub + .shader_modules + .get(desc.stage.module) + .map_err(|_| validation::StageError::InvalidModule)?; + + if shader_module.device.as_info().id() != self.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + + // Get the pipeline layout from the desc if it is provided. + let pipeline_layout = match desc.layout { + Some(pipeline_layout_id) => { + let pipeline_layout = hub + .pipeline_layouts + .get(pipeline_layout_id) + .map_err(|_| pipeline::CreateComputePipelineError::InvalidLayout)?; + + if pipeline_layout.device.as_info().id() != self.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + + Some(pipeline_layout) + } + None => None, + }; + + let mut binding_layout_source = match pipeline_layout { + Some(ref pipeline_layout) => { + validation::BindingLayoutSource::Provided(pipeline_layout.get_binding_maps()) + } + None => validation::BindingLayoutSource::new_derived(&self.limits), + }; + let mut shader_binding_sizes = FastHashMap::default(); + let io = validation::StageIo::default(); + + { + let stage = wgt::ShaderStages::COMPUTE; + + if let Some(ref interface) = shader_module.interface { + let _ = interface.check_stage( + &mut binding_layout_source, + &mut shader_binding_sizes, + &desc.stage.entry_point, + stage, + io, + None, + )?; + } + } + + let pipeline_layout = match binding_layout_source { + validation::BindingLayoutSource::Provided(_) => { + drop(binding_layout_source); + pipeline_layout.unwrap() + } + validation::BindingLayoutSource::Derived(entries) => self.derive_pipeline_layout( + implicit_context, + entries, + &hub.bind_group_layouts, + &hub.pipeline_layouts, + )?, + }; + + let late_sized_buffer_groups = + Device::make_late_sized_buffer_groups(&shader_binding_sizes, &pipeline_layout); + + let pipeline_desc = hal::ComputePipelineDescriptor { + label: desc.label.to_hal(self.instance_flags), + layout: pipeline_layout.raw(), + stage: hal::ProgrammableStage { + entry_point: desc.stage.entry_point.as_ref(), + module: shader_module.raw(), + }, + }; + + let raw = unsafe { + self.raw + .as_ref() + .unwrap() + .create_compute_pipeline(&pipeline_desc) + } + .map_err(|err| match err { + hal::PipelineError::Device(error) => { + pipeline::CreateComputePipelineError::Device(error.into()) + } + hal::PipelineError::Linkage(_stages, msg) => { + pipeline::CreateComputePipelineError::Internal(msg) + } + hal::PipelineError::EntryPoint(_stage) => { + pipeline::CreateComputePipelineError::Internal(ENTRYPOINT_FAILURE_ERROR.to_string()) + } + })?; + + let pipeline = pipeline::ComputePipeline { + raw: Some(raw), + layout: pipeline_layout, + device: self.clone(), + _shader_module: shader_module, + late_sized_buffer_groups, + info: ResourceInfo::new(desc.label.borrow_or_default()), + }; + Ok(pipeline) + } + + pub(crate) fn create_render_pipeline( + self: &Arc<Self>, + adapter: &Adapter<A>, + desc: &pipeline::RenderPipelineDescriptor, + implicit_context: Option<ImplicitPipelineContext>, + hub: &Hub<A>, + ) -> Result<pipeline::RenderPipeline<A>, pipeline::CreateRenderPipelineError> { + use wgt::TextureFormatFeatureFlags as Tfff; + + // This has to be done first, or otherwise the IDs may be pointing to entries + // that are not even in the storage. + if let Some(ref ids) = implicit_context { + //TODO: only lock mutable if the layout is derived + let mut pipeline_layout_guard = hub.pipeline_layouts.write(); + let mut bgl_guard = hub.bind_group_layouts.write(); + pipeline_layout_guard.insert_error(ids.root_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL); + for &bgl_id in ids.group_ids.iter() { + bgl_guard.insert_error(bgl_id, IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL); + } + } + + let mut shader_binding_sizes = FastHashMap::default(); + + let num_attachments = desc.fragment.as_ref().map(|f| f.targets.len()).unwrap_or(0); + if num_attachments > hal::MAX_COLOR_ATTACHMENTS { + return Err(pipeline::CreateRenderPipelineError::ColorAttachment( + command::ColorAttachmentError::TooMany { + given: num_attachments, + limit: hal::MAX_COLOR_ATTACHMENTS, + }, + )); + } + + let color_targets = desc + .fragment + .as_ref() + .map_or(&[][..], |fragment| &fragment.targets); + let depth_stencil_state = desc.depth_stencil.as_ref(); + + let cts: ArrayVec<_, { hal::MAX_COLOR_ATTACHMENTS }> = + color_targets.iter().filter_map(|x| x.as_ref()).collect(); + if !cts.is_empty() && { + let first = &cts[0]; + cts[1..] + .iter() + .any(|ct| ct.write_mask != first.write_mask || ct.blend != first.blend) + } { + log::debug!("Color targets: {:?}", color_targets); + self.require_downlevel_flags(wgt::DownlevelFlags::INDEPENDENT_BLEND)?; + } + + let mut io = validation::StageIo::default(); + let mut validated_stages = wgt::ShaderStages::empty(); + + let mut vertex_steps = Vec::with_capacity(desc.vertex.buffers.len()); + let mut vertex_buffers = Vec::with_capacity(desc.vertex.buffers.len()); + let mut total_attributes = 0; + let mut shader_expects_dual_source_blending = false; + let mut pipeline_expects_dual_source_blending = false; + for (i, vb_state) in desc.vertex.buffers.iter().enumerate() { + let mut last_stride = 0; + for attribute in vb_state.attributes.iter() { + last_stride = last_stride.max(attribute.offset + attribute.format.size()); + } + vertex_steps.push(pipeline::VertexStep { + stride: vb_state.array_stride, + last_stride, + mode: vb_state.step_mode, + }); + if vb_state.attributes.is_empty() { + continue; + } + if vb_state.array_stride > self.limits.max_vertex_buffer_array_stride as u64 { + return Err(pipeline::CreateRenderPipelineError::VertexStrideTooLarge { + index: i as u32, + given: vb_state.array_stride as u32, + limit: self.limits.max_vertex_buffer_array_stride, + }); + } + if vb_state.array_stride % wgt::VERTEX_STRIDE_ALIGNMENT != 0 { + return Err(pipeline::CreateRenderPipelineError::UnalignedVertexStride { + index: i as u32, + stride: vb_state.array_stride, + }); + } + vertex_buffers.push(hal::VertexBufferLayout { + array_stride: vb_state.array_stride, + step_mode: vb_state.step_mode, + attributes: vb_state.attributes.as_ref(), + }); + + for attribute in vb_state.attributes.iter() { + if attribute.offset >= 0x10000000 { + return Err( + pipeline::CreateRenderPipelineError::InvalidVertexAttributeOffset { + location: attribute.shader_location, + offset: attribute.offset, + }, + ); + } + + if let wgt::VertexFormat::Float64 + | wgt::VertexFormat::Float64x2 + | wgt::VertexFormat::Float64x3 + | wgt::VertexFormat::Float64x4 = attribute.format + { + self.require_features(wgt::Features::VERTEX_ATTRIBUTE_64BIT)?; + } + + let previous = io.insert( + attribute.shader_location, + validation::InterfaceVar::vertex_attribute(attribute.format), + ); + + if previous.is_some() { + return Err(pipeline::CreateRenderPipelineError::ShaderLocationClash( + attribute.shader_location, + )); + } + } + total_attributes += vb_state.attributes.len(); + } + + if vertex_buffers.len() > self.limits.max_vertex_buffers as usize { + return Err(pipeline::CreateRenderPipelineError::TooManyVertexBuffers { + given: vertex_buffers.len() as u32, + limit: self.limits.max_vertex_buffers, + }); + } + if total_attributes > self.limits.max_vertex_attributes as usize { + return Err( + pipeline::CreateRenderPipelineError::TooManyVertexAttributes { + given: total_attributes as u32, + limit: self.limits.max_vertex_attributes, + }, + ); + } + + if desc.primitive.strip_index_format.is_some() && !desc.primitive.topology.is_strip() { + return Err( + pipeline::CreateRenderPipelineError::StripIndexFormatForNonStripTopology { + strip_index_format: desc.primitive.strip_index_format, + topology: desc.primitive.topology, + }, + ); + } + + if desc.primitive.unclipped_depth { + self.require_features(wgt::Features::DEPTH_CLIP_CONTROL)?; + } + + if desc.primitive.polygon_mode == wgt::PolygonMode::Line { + self.require_features(wgt::Features::POLYGON_MODE_LINE)?; + } + if desc.primitive.polygon_mode == wgt::PolygonMode::Point { + self.require_features(wgt::Features::POLYGON_MODE_POINT)?; + } + + if desc.primitive.conservative { + self.require_features(wgt::Features::CONSERVATIVE_RASTERIZATION)?; + } + + if desc.primitive.conservative && desc.primitive.polygon_mode != wgt::PolygonMode::Fill { + return Err( + pipeline::CreateRenderPipelineError::ConservativeRasterizationNonFillPolygonMode, + ); + } + + for (i, cs) in color_targets.iter().enumerate() { + if let Some(cs) = cs.as_ref() { + let error = loop { + if cs.write_mask.contains_invalid_bits() { + break Some(pipeline::ColorStateError::InvalidWriteMask(cs.write_mask)); + } + + let format_features = self.describe_format_features(adapter, cs.format)?; + if !format_features + .allowed_usages + .contains(wgt::TextureUsages::RENDER_ATTACHMENT) + { + break Some(pipeline::ColorStateError::FormatNotRenderable(cs.format)); + } + let blendable = format_features.flags.contains(Tfff::BLENDABLE); + let filterable = format_features.flags.contains(Tfff::FILTERABLE); + let adapter_specific = self + .features + .contains(wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES); + // according to WebGPU specifications the texture needs to be + // [`TextureFormatFeatureFlags::FILTERABLE`] if blending is set - use + // [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] to elude + // this limitation + if cs.blend.is_some() && (!blendable || (!filterable && !adapter_specific)) { + break Some(pipeline::ColorStateError::FormatNotBlendable(cs.format)); + } + if !hal::FormatAspects::from(cs.format).contains(hal::FormatAspects::COLOR) { + break Some(pipeline::ColorStateError::FormatNotColor(cs.format)); + } + if desc.multisample.count > 1 + && !format_features + .flags + .sample_count_supported(desc.multisample.count) + { + break Some(pipeline::ColorStateError::InvalidSampleCount( + desc.multisample.count, + cs.format, + cs.format + .guaranteed_format_features(self.features) + .flags + .supported_sample_counts(), + adapter + .get_texture_format_features(cs.format) + .flags + .supported_sample_counts(), + )); + } + if let Some(blend_mode) = cs.blend { + for factor in [ + blend_mode.color.src_factor, + blend_mode.color.dst_factor, + blend_mode.alpha.src_factor, + blend_mode.alpha.dst_factor, + ] { + if factor.ref_second_blend_source() { + self.require_features(wgt::Features::DUAL_SOURCE_BLENDING)?; + if i == 0 { + pipeline_expects_dual_source_blending = true; + break; + } else { + return Err(crate::pipeline::CreateRenderPipelineError + ::BlendFactorOnUnsupportedTarget { factor, target: i as u32 }); + } + } + } + } + break None; + }; + if let Some(e) = error { + return Err(pipeline::CreateRenderPipelineError::ColorState(i as u8, e)); + } + } + } + + if let Some(ds) = depth_stencil_state { + let error = loop { + let format_features = self.describe_format_features(adapter, ds.format)?; + if !format_features + .allowed_usages + .contains(wgt::TextureUsages::RENDER_ATTACHMENT) + { + break Some(pipeline::DepthStencilStateError::FormatNotRenderable( + ds.format, + )); + } + + let aspect = hal::FormatAspects::from(ds.format); + if ds.is_depth_enabled() && !aspect.contains(hal::FormatAspects::DEPTH) { + break Some(pipeline::DepthStencilStateError::FormatNotDepth(ds.format)); + } + if ds.stencil.is_enabled() && !aspect.contains(hal::FormatAspects::STENCIL) { + break Some(pipeline::DepthStencilStateError::FormatNotStencil( + ds.format, + )); + } + if desc.multisample.count > 1 + && !format_features + .flags + .sample_count_supported(desc.multisample.count) + { + break Some(pipeline::DepthStencilStateError::InvalidSampleCount( + desc.multisample.count, + ds.format, + ds.format + .guaranteed_format_features(self.features) + .flags + .supported_sample_counts(), + adapter + .get_texture_format_features(ds.format) + .flags + .supported_sample_counts(), + )); + } + + break None; + }; + if let Some(e) = error { + return Err(pipeline::CreateRenderPipelineError::DepthStencilState(e)); + } + + if ds.bias.clamp != 0.0 { + self.require_downlevel_flags(wgt::DownlevelFlags::DEPTH_BIAS_CLAMP)?; + } + } + + // Get the pipeline layout from the desc if it is provided. + let pipeline_layout = match desc.layout { + Some(pipeline_layout_id) => { + let pipeline_layout = hub + .pipeline_layouts + .get(pipeline_layout_id) + .map_err(|_| pipeline::CreateRenderPipelineError::InvalidLayout)?; + + if pipeline_layout.device.as_info().id() != self.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + + Some(pipeline_layout) + } + None => None, + }; + + let mut binding_layout_source = match pipeline_layout { + Some(ref pipeline_layout) => { + validation::BindingLayoutSource::Provided(pipeline_layout.get_binding_maps()) + } + None => validation::BindingLayoutSource::new_derived(&self.limits), + }; + + let samples = { + let sc = desc.multisample.count; + if sc == 0 || sc > 32 || !conv::is_power_of_two_u32(sc) { + return Err(pipeline::CreateRenderPipelineError::InvalidSampleCount(sc)); + } + sc + }; + + let vertex_shader_module; + let vertex_stage = { + let stage_desc = &desc.vertex.stage; + let stage = wgt::ShaderStages::VERTEX; + + vertex_shader_module = hub.shader_modules.get(stage_desc.module).map_err(|_| { + pipeline::CreateRenderPipelineError::Stage { + stage, + error: validation::StageError::InvalidModule, + } + })?; + if vertex_shader_module.device.as_info().id() != self.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + + if let Some(ref interface) = vertex_shader_module.interface { + io = interface + .check_stage( + &mut binding_layout_source, + &mut shader_binding_sizes, + &stage_desc.entry_point, + stage, + io, + desc.depth_stencil.as_ref().map(|d| d.depth_compare), + ) + .map_err(|error| pipeline::CreateRenderPipelineError::Stage { stage, error })?; + validated_stages |= stage; + } + + hal::ProgrammableStage { + module: vertex_shader_module.raw(), + entry_point: stage_desc.entry_point.as_ref(), + } + }; + + let mut fragment_shader_module = None; + let fragment_stage = match desc.fragment { + Some(ref fragment_state) => { + let stage = wgt::ShaderStages::FRAGMENT; + + let shader_module = fragment_shader_module.insert( + hub.shader_modules + .get(fragment_state.stage.module) + .map_err(|_| pipeline::CreateRenderPipelineError::Stage { + stage, + error: validation::StageError::InvalidModule, + })?, + ); + + if validated_stages == wgt::ShaderStages::VERTEX { + if let Some(ref interface) = shader_module.interface { + io = interface + .check_stage( + &mut binding_layout_source, + &mut shader_binding_sizes, + &fragment_state.stage.entry_point, + stage, + io, + desc.depth_stencil.as_ref().map(|d| d.depth_compare), + ) + .map_err(|error| pipeline::CreateRenderPipelineError::Stage { + stage, + error, + })?; + validated_stages |= stage; + } + } + + if let Some(ref interface) = shader_module.interface { + shader_expects_dual_source_blending = interface + .fragment_uses_dual_source_blending(&fragment_state.stage.entry_point) + .map_err(|error| pipeline::CreateRenderPipelineError::Stage { + stage, + error, + })?; + } + + Some(hal::ProgrammableStage { + module: shader_module.raw(), + entry_point: fragment_state.stage.entry_point.as_ref(), + }) + } + None => None, + }; + + if !pipeline_expects_dual_source_blending && shader_expects_dual_source_blending { + return Err( + pipeline::CreateRenderPipelineError::ShaderExpectsPipelineToUseDualSourceBlending, + ); + } + if pipeline_expects_dual_source_blending && !shader_expects_dual_source_blending { + return Err( + pipeline::CreateRenderPipelineError::PipelineExpectsShaderToUseDualSourceBlending, + ); + } + + if validated_stages.contains(wgt::ShaderStages::FRAGMENT) { + for (i, output) in io.iter() { + match color_targets.get(*i as usize) { + Some(Some(state)) => { + validation::check_texture_format(state.format, &output.ty).map_err( + |pipeline| { + pipeline::CreateRenderPipelineError::ColorState( + *i as u8, + pipeline::ColorStateError::IncompatibleFormat { + pipeline, + shader: output.ty, + }, + ) + }, + )?; + } + _ => { + log::warn!( + "The fragment stage {:?} output @location({}) values are ignored", + fragment_stage + .as_ref() + .map_or("", |stage| stage.entry_point), + i + ); + } + } + } + } + let last_stage = match desc.fragment { + Some(_) => wgt::ShaderStages::FRAGMENT, + None => wgt::ShaderStages::VERTEX, + }; + if desc.layout.is_none() && !validated_stages.contains(last_stage) { + return Err(pipeline::ImplicitLayoutError::ReflectionError(last_stage).into()); + } + + let pipeline_layout = match binding_layout_source { + validation::BindingLayoutSource::Provided(_) => { + drop(binding_layout_source); + pipeline_layout.unwrap() + } + validation::BindingLayoutSource::Derived(entries) => self.derive_pipeline_layout( + implicit_context, + entries, + &hub.bind_group_layouts, + &hub.pipeline_layouts, + )?, + }; + + // Multiview is only supported if the feature is enabled + if desc.multiview.is_some() { + self.require_features(wgt::Features::MULTIVIEW)?; + } + + if !self + .downlevel + .flags + .contains(wgt::DownlevelFlags::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED) + { + for (binding, size) in shader_binding_sizes.iter() { + if size.get() % 16 != 0 { + return Err(pipeline::CreateRenderPipelineError::UnalignedShader { + binding: binding.binding, + group: binding.group, + size: size.get(), + }); + } + } + } + + let late_sized_buffer_groups = + Device::make_late_sized_buffer_groups(&shader_binding_sizes, &pipeline_layout); + + let pipeline_desc = hal::RenderPipelineDescriptor { + label: desc.label.to_hal(self.instance_flags), + layout: pipeline_layout.raw(), + vertex_buffers: &vertex_buffers, + vertex_stage, + primitive: desc.primitive, + depth_stencil: desc.depth_stencil.clone(), + multisample: desc.multisample, + fragment_stage, + color_targets, + multiview: desc.multiview, + }; + let raw = unsafe { + self.raw + .as_ref() + .unwrap() + .create_render_pipeline(&pipeline_desc) + } + .map_err(|err| match err { + hal::PipelineError::Device(error) => { + pipeline::CreateRenderPipelineError::Device(error.into()) + } + hal::PipelineError::Linkage(stage, msg) => { + pipeline::CreateRenderPipelineError::Internal { stage, error: msg } + } + hal::PipelineError::EntryPoint(stage) => { + pipeline::CreateRenderPipelineError::Internal { + stage: hal::auxil::map_naga_stage(stage), + error: ENTRYPOINT_FAILURE_ERROR.to_string(), + } + } + })?; + + let pass_context = RenderPassContext { + attachments: AttachmentData { + colors: color_targets + .iter() + .map(|state| state.as_ref().map(|s| s.format)) + .collect(), + resolves: ArrayVec::new(), + depth_stencil: depth_stencil_state.as_ref().map(|state| state.format), + }, + sample_count: samples, + multiview: desc.multiview, + }; + + let mut flags = pipeline::PipelineFlags::empty(); + for state in color_targets.iter().filter_map(|s| s.as_ref()) { + if let Some(ref bs) = state.blend { + if bs.color.uses_constant() | bs.alpha.uses_constant() { + flags |= pipeline::PipelineFlags::BLEND_CONSTANT; + } + } + } + if let Some(ds) = depth_stencil_state.as_ref() { + if ds.stencil.is_enabled() && ds.stencil.needs_ref_value() { + flags |= pipeline::PipelineFlags::STENCIL_REFERENCE; + } + if !ds.is_depth_read_only() { + flags |= pipeline::PipelineFlags::WRITES_DEPTH; + } + if !ds.is_stencil_read_only(desc.primitive.cull_mode) { + flags |= pipeline::PipelineFlags::WRITES_STENCIL; + } + } + + let shader_modules = { + let mut shader_modules = ArrayVec::new(); + shader_modules.push(vertex_shader_module); + shader_modules.extend(fragment_shader_module); + shader_modules + }; + + let pipeline = pipeline::RenderPipeline { + raw: Some(raw), + layout: pipeline_layout, + device: self.clone(), + pass_context, + _shader_modules: shader_modules, + flags, + strip_index_format: desc.primitive.strip_index_format, + vertex_steps, + late_sized_buffer_groups, + info: ResourceInfo::new(desc.label.borrow_or_default()), + }; + Ok(pipeline) + } + + pub(crate) fn get_texture_format_features( + &self, + adapter: &Adapter<A>, + format: TextureFormat, + ) -> wgt::TextureFormatFeatures { + // Variant of adapter.get_texture_format_features that takes device features into account + use wgt::TextureFormatFeatureFlags as tfsc; + let mut format_features = adapter.get_texture_format_features(format); + if (format == TextureFormat::R32Float + || format == TextureFormat::Rg32Float + || format == TextureFormat::Rgba32Float) + && !self.features.contains(wgt::Features::FLOAT32_FILTERABLE) + { + format_features.flags.set(tfsc::FILTERABLE, false); + } + format_features + } + + pub(crate) fn describe_format_features( + &self, + adapter: &Adapter<A>, + format: TextureFormat, + ) -> Result<wgt::TextureFormatFeatures, MissingFeatures> { + self.require_features(format.required_features())?; + + let using_device_features = self + .features + .contains(wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES); + // If we're running downlevel, we need to manually ask the backend what + // we can use as we can't trust WebGPU. + let downlevel = !self + .downlevel + .flags + .contains(wgt::DownlevelFlags::WEBGPU_TEXTURE_FORMAT_SUPPORT); + + if using_device_features || downlevel { + Ok(self.get_texture_format_features(adapter, format)) + } else { + Ok(format.guaranteed_format_features(self.features)) + } + } + + pub(crate) fn wait_for_submit( + &self, + submission_index: SubmissionIndex, + ) -> Result<(), WaitIdleError> { + let guard = self.fence.read(); + let fence = guard.as_ref().unwrap(); + let last_done_index = unsafe { + self.raw + .as_ref() + .unwrap() + .get_fence_value(fence) + .map_err(DeviceError::from)? + }; + if last_done_index < submission_index { + log::info!("Waiting for submission {:?}", submission_index); + unsafe { + self.raw + .as_ref() + .unwrap() + .wait(fence, submission_index, !0) + .map_err(DeviceError::from)? + }; + drop(guard); + let closures = self.lock_life().triage_submissions( + submission_index, + self.command_allocator.lock().as_mut().unwrap(), + ); + assert!( + closures.is_empty(), + "wait_for_submit is not expected to work with closures" + ); + } + Ok(()) + } + + pub(crate) fn create_query_set( + self: &Arc<Self>, + desc: &resource::QuerySetDescriptor, + ) -> Result<QuerySet<A>, resource::CreateQuerySetError> { + use resource::CreateQuerySetError as Error; + + match desc.ty { + wgt::QueryType::Occlusion => {} + wgt::QueryType::Timestamp => { + self.require_features(wgt::Features::TIMESTAMP_QUERY)?; + } + wgt::QueryType::PipelineStatistics(..) => { + self.require_features(wgt::Features::PIPELINE_STATISTICS_QUERY)?; + } + } + + if desc.count == 0 { + return Err(Error::ZeroCount); + } + + if desc.count > wgt::QUERY_SET_MAX_QUERIES { + return Err(Error::TooManyQueries { + count: desc.count, + maximum: wgt::QUERY_SET_MAX_QUERIES, + }); + } + + let hal_desc = desc.map_label(|label| label.to_hal(self.instance_flags)); + Ok(QuerySet { + raw: Some(unsafe { self.raw().create_query_set(&hal_desc).unwrap() }), + device: self.clone(), + info: ResourceInfo::new(""), + desc: desc.map_label(|_| ()), + }) + } + + pub(crate) fn lose(&self, message: &str) { + // Follow the steps at https://gpuweb.github.io/gpuweb/#lose-the-device. + + // Mark the device explicitly as invalid. This is checked in various + // places to prevent new work from being submitted. + self.valid.store(false, Ordering::Release); + + // 1) Resolve the GPUDevice device.lost promise. + let mut life_lock = self.lock_life(); + let closure = life_lock.device_lost_closure.take(); + // It's important to not hold the lock while calling the closure and while calling + // release_gpu_resources which may take the lock again. + drop(life_lock); + + if let Some(device_lost_closure) = closure { + device_lost_closure.call(DeviceLostReason::Unknown, message.to_string()); + } + + // 2) Complete any outstanding mapAsync() steps. + // 3) Complete any outstanding onSubmittedWorkDone() steps. + + // These parts are passively accomplished by setting valid to false, + // since that will prevent any new work from being added to the queues. + // Future calls to poll_devices will continue to check the work queues + // until they are cleared, and then drop the device. + + // Eagerly release GPU resources. + self.release_gpu_resources(); + } + + pub(crate) fn release_gpu_resources(&self) { + // This is called when the device is lost, which makes every associated + // resource invalid and unusable. This is an opportunity to release all of + // the underlying gpu resources, even though the objects remain visible to + // the user agent. We purge this memory naturally when resources have been + // moved into the appropriate buckets, so this function just needs to + // initiate movement into those buckets, and it can do that by calling + // "destroy" on all the resources we know about. + + // During these iterations, we discard all errors. We don't care! + let trackers = self.trackers.lock(); + for buffer in trackers.buffers.used_resources() { + let _ = buffer.destroy(); + } + for texture in trackers.textures.used_resources() { + let _ = texture.destroy(); + } + } +} + +impl<A: HalApi> Device<A> { + pub(crate) fn destroy_command_buffer(&self, mut cmd_buf: command::CommandBuffer<A>) { + let mut baked = cmd_buf.extract_baked_commands(); + unsafe { + baked.encoder.reset_all(baked.list.into_iter()); + } + unsafe { + self.raw + .as_ref() + .unwrap() + .destroy_command_encoder(baked.encoder); + } + } + + /// Wait for idle and remove resources that we can, before we die. + pub(crate) fn prepare_to_die(&self) { + self.pending_writes.lock().as_mut().unwrap().deactivate(); + let current_index = self.active_submission_index.load(Ordering::Relaxed); + if let Err(error) = unsafe { + let fence = self.fence.read(); + let fence = fence.as_ref().unwrap(); + self.raw + .as_ref() + .unwrap() + .wait(fence, current_index, CLEANUP_WAIT_MS) + } { + log::error!("failed to wait for the device: {error}"); + } + let mut life_tracker = self.lock_life(); + let _ = life_tracker.triage_submissions( + current_index, + self.command_allocator.lock().as_mut().unwrap(), + ); + if let Some(device_lost_closure) = life_tracker.device_lost_closure.take() { + // It's important to not hold the lock while calling the closure. + drop(life_tracker); + device_lost_closure.call(DeviceLostReason::Dropped, "Device is dying.".to_string()); + } + #[cfg(feature = "trace")] + { + *self.trace.lock() = None; + } + } +} + +impl<A: HalApi> Resource for Device<A> { + const TYPE: ResourceType = "Device"; + + type Marker = crate::id::markers::Device; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} diff --git a/third_party/rust/wgpu-core/src/device/trace.rs b/third_party/rust/wgpu-core/src/device/trace.rs new file mode 100644 index 0000000000..0802b610d8 --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/trace.rs @@ -0,0 +1,235 @@ +use crate::id; +use std::ops::Range; +#[cfg(feature = "trace")] +use std::{borrow::Cow, io::Write as _}; + +//TODO: consider a readable Id that doesn't include the backend + +type FileName = String; + +pub const FILE_NAME: &str = "trace.ron"; + +#[cfg(feature = "trace")] +pub(crate) fn new_render_bundle_encoder_descriptor<'a>( + label: crate::Label<'a>, + context: &'a super::RenderPassContext, + depth_read_only: bool, + stencil_read_only: bool, +) -> crate::command::RenderBundleEncoderDescriptor<'a> { + crate::command::RenderBundleEncoderDescriptor { + label, + color_formats: Cow::Borrowed(&context.attachments.colors), + depth_stencil: context.attachments.depth_stencil.map(|format| { + wgt::RenderBundleDepthStencil { + format, + depth_read_only, + stencil_read_only, + } + }), + sample_count: context.sample_count, + multiview: context.multiview, + } +} + +#[allow(clippy::large_enum_variant)] +#[derive(Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Action<'a> { + Init { + desc: crate::device::DeviceDescriptor<'a>, + backend: wgt::Backend, + }, + ConfigureSurface( + id::SurfaceId, + wgt::SurfaceConfiguration<Vec<wgt::TextureFormat>>, + ), + CreateBuffer(id::BufferId, crate::resource::BufferDescriptor<'a>), + FreeBuffer(id::BufferId), + DestroyBuffer(id::BufferId), + CreateTexture(id::TextureId, crate::resource::TextureDescriptor<'a>), + FreeTexture(id::TextureId), + DestroyTexture(id::TextureId), + CreateTextureView { + id: id::TextureViewId, + parent_id: id::TextureId, + desc: crate::resource::TextureViewDescriptor<'a>, + }, + DestroyTextureView(id::TextureViewId), + CreateSampler(id::SamplerId, crate::resource::SamplerDescriptor<'a>), + DestroySampler(id::SamplerId), + GetSurfaceTexture { + id: id::TextureId, + parent_id: id::SurfaceId, + }, + Present(id::SurfaceId), + DiscardSurfaceTexture(id::SurfaceId), + CreateBindGroupLayout( + id::BindGroupLayoutId, + crate::binding_model::BindGroupLayoutDescriptor<'a>, + ), + DestroyBindGroupLayout(id::BindGroupLayoutId), + CreatePipelineLayout( + id::PipelineLayoutId, + crate::binding_model::PipelineLayoutDescriptor<'a>, + ), + DestroyPipelineLayout(id::PipelineLayoutId), + CreateBindGroup( + id::BindGroupId, + crate::binding_model::BindGroupDescriptor<'a>, + ), + DestroyBindGroup(id::BindGroupId), + CreateShaderModule { + id: id::ShaderModuleId, + desc: crate::pipeline::ShaderModuleDescriptor<'a>, + data: FileName, + }, + DestroyShaderModule(id::ShaderModuleId), + CreateComputePipeline { + id: id::ComputePipelineId, + desc: crate::pipeline::ComputePipelineDescriptor<'a>, + #[cfg_attr(feature = "replay", serde(default))] + implicit_context: Option<super::ImplicitPipelineContext>, + }, + DestroyComputePipeline(id::ComputePipelineId), + CreateRenderPipeline { + id: id::RenderPipelineId, + desc: crate::pipeline::RenderPipelineDescriptor<'a>, + #[cfg_attr(feature = "replay", serde(default))] + implicit_context: Option<super::ImplicitPipelineContext>, + }, + DestroyRenderPipeline(id::RenderPipelineId), + CreateRenderBundle { + id: id::RenderBundleId, + desc: crate::command::RenderBundleEncoderDescriptor<'a>, + base: crate::command::BasePass<crate::command::RenderCommand>, + }, + DestroyRenderBundle(id::RenderBundleId), + CreateQuerySet { + id: id::QuerySetId, + desc: crate::resource::QuerySetDescriptor<'a>, + }, + DestroyQuerySet(id::QuerySetId), + WriteBuffer { + id: id::BufferId, + data: FileName, + range: Range<wgt::BufferAddress>, + queued: bool, + }, + WriteTexture { + to: crate::command::ImageCopyTexture, + data: FileName, + layout: wgt::ImageDataLayout, + size: wgt::Extent3d, + }, + Submit(crate::SubmissionIndex, Vec<Command>), +} + +#[derive(Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Command { + CopyBufferToBuffer { + src: id::BufferId, + src_offset: wgt::BufferAddress, + dst: id::BufferId, + dst_offset: wgt::BufferAddress, + size: wgt::BufferAddress, + }, + CopyBufferToTexture { + src: crate::command::ImageCopyBuffer, + dst: crate::command::ImageCopyTexture, + size: wgt::Extent3d, + }, + CopyTextureToBuffer { + src: crate::command::ImageCopyTexture, + dst: crate::command::ImageCopyBuffer, + size: wgt::Extent3d, + }, + CopyTextureToTexture { + src: crate::command::ImageCopyTexture, + dst: crate::command::ImageCopyTexture, + size: wgt::Extent3d, + }, + ClearBuffer { + dst: id::BufferId, + offset: wgt::BufferAddress, + size: Option<wgt::BufferAddress>, + }, + ClearTexture { + dst: id::TextureId, + subresource_range: wgt::ImageSubresourceRange, + }, + WriteTimestamp { + query_set_id: id::QuerySetId, + query_index: u32, + }, + ResolveQuerySet { + query_set_id: id::QuerySetId, + start_query: u32, + query_count: u32, + destination: id::BufferId, + destination_offset: wgt::BufferAddress, + }, + PushDebugGroup(String), + PopDebugGroup, + InsertDebugMarker(String), + RunComputePass { + base: crate::command::BasePass<crate::command::ComputeCommand>, + timestamp_writes: Option<crate::command::ComputePassTimestampWrites>, + }, + RunRenderPass { + base: crate::command::BasePass<crate::command::RenderCommand>, + target_colors: Vec<Option<crate::command::RenderPassColorAttachment>>, + target_depth_stencil: Option<crate::command::RenderPassDepthStencilAttachment>, + timestamp_writes: Option<crate::command::RenderPassTimestampWrites>, + occlusion_query_set_id: Option<id::QuerySetId>, + }, +} + +#[cfg(feature = "trace")] +#[derive(Debug)] +pub struct Trace { + path: std::path::PathBuf, + file: std::fs::File, + config: ron::ser::PrettyConfig, + binary_id: usize, +} + +#[cfg(feature = "trace")] +impl Trace { + pub fn new(path: &std::path::Path) -> Result<Self, std::io::Error> { + log::info!("Tracing into '{:?}'", path); + let mut file = std::fs::File::create(path.join(FILE_NAME))?; + file.write_all(b"[\n")?; + Ok(Self { + path: path.to_path_buf(), + file, + config: ron::ser::PrettyConfig::default(), + binary_id: 0, + }) + } + + pub fn make_binary(&mut self, kind: &str, data: &[u8]) -> String { + self.binary_id += 1; + let name = format!("data{}.{}", self.binary_id, kind); + let _ = std::fs::write(self.path.join(&name), data); + name + } + + pub(crate) fn add(&mut self, action: Action) { + match ron::ser::to_string_pretty(&action, self.config.clone()) { + Ok(string) => { + let _ = writeln!(self.file, "{},", string); + } + Err(e) => { + log::warn!("RON serialization failure: {:?}", e); + } + } + } +} + +#[cfg(feature = "trace")] +impl Drop for Trace { + fn drop(&mut self) { + let _ = self.file.write_all(b"]"); + } +} diff --git a/third_party/rust/wgpu-core/src/error.rs b/third_party/rust/wgpu-core/src/error.rs new file mode 100644 index 0000000000..91b46261ae --- /dev/null +++ b/third_party/rust/wgpu-core/src/error.rs @@ -0,0 +1,180 @@ +use core::fmt; +use std::error::Error; + +use crate::{gfx_select, global::Global}; + +pub struct ErrorFormatter<'a> { + writer: &'a mut dyn fmt::Write, + global: &'a Global, +} + +impl<'a> ErrorFormatter<'a> { + pub fn error(&mut self, err: &dyn Error) { + writeln!(self.writer, " {err}").expect("Error formatting error"); + } + + pub fn note(&mut self, note: &dyn fmt::Display) { + writeln!(self.writer, " note: {note}").expect("Error formatting error"); + } + + pub fn label(&mut self, label_key: &str, label_value: &String) { + if !label_key.is_empty() && !label_value.is_empty() { + self.note(&format!("{label_key} = `{label_value}`")); + } + } + + pub fn bind_group_label(&mut self, id: &crate::id::BindGroupId) { + let label: String = gfx_select!(id => self.global.bind_group_label(*id)); + self.label("bind group", &label); + } + + pub fn bind_group_layout_label(&mut self, id: &crate::id::BindGroupLayoutId) { + let label: String = gfx_select!(id => self.global.bind_group_layout_label(*id)); + self.label("bind group layout", &label); + } + + pub fn render_pipeline_label(&mut self, id: &crate::id::RenderPipelineId) { + let label: String = gfx_select!(id => self.global.render_pipeline_label(*id)); + self.label("render pipeline", &label); + } + + pub fn compute_pipeline_label(&mut self, id: &crate::id::ComputePipelineId) { + let label: String = gfx_select!(id => self.global.compute_pipeline_label(*id)); + self.label("compute pipeline", &label); + } + + pub fn buffer_label_with_key(&mut self, id: &crate::id::BufferId, key: &str) { + let label: String = gfx_select!(id => self.global.buffer_label(*id)); + self.label(key, &label); + } + + pub fn buffer_label(&mut self, id: &crate::id::BufferId) { + self.buffer_label_with_key(id, "buffer"); + } + + pub fn texture_label_with_key(&mut self, id: &crate::id::TextureId, key: &str) { + let label: String = gfx_select!(id => self.global.texture_label(*id)); + self.label(key, &label); + } + + pub fn texture_label(&mut self, id: &crate::id::TextureId) { + self.texture_label_with_key(id, "texture"); + } + + pub fn texture_view_label_with_key(&mut self, id: &crate::id::TextureViewId, key: &str) { + let label: String = gfx_select!(id => self.global.texture_view_label(*id)); + self.label(key, &label); + } + + pub fn texture_view_label(&mut self, id: &crate::id::TextureViewId) { + self.texture_view_label_with_key(id, "texture view"); + } + + pub fn sampler_label(&mut self, id: &crate::id::SamplerId) { + let label: String = gfx_select!(id => self.global.sampler_label(*id)); + self.label("sampler", &label); + } + + pub fn command_buffer_label(&mut self, id: &crate::id::CommandBufferId) { + let label: String = gfx_select!(id => self.global.command_buffer_label(*id)); + self.label("command buffer", &label); + } + + pub fn query_set_label(&mut self, id: &crate::id::QuerySetId) { + let label: String = gfx_select!(id => self.global.query_set_label(*id)); + self.label("query set", &label); + } +} + +pub trait PrettyError: Error + Sized { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + } +} + +pub fn format_pretty_any( + writer: &mut dyn fmt::Write, + global: &Global, + error: &(dyn Error + 'static), +) { + let mut fmt = ErrorFormatter { writer, global }; + + if let Some(pretty_err) = error.downcast_ref::<ContextError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + + if let Some(pretty_err) = error.downcast_ref::<crate::command::RenderCommandError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::binding_model::CreateBindGroupError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = + error.downcast_ref::<crate::binding_model::CreatePipelineLayoutError>() + { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::ExecutionError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::RenderPassErrorInner>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::RenderPassError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::ComputePassErrorInner>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::ComputePassError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::RenderBundleError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::TransferError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::PassErrorScope>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::track::UsageConflict>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::QueryError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + + // default + fmt.error(error) +} + +#[derive(Debug)] +pub struct ContextError { + pub string: &'static str, + #[cfg(send_sync)] + pub cause: Box<dyn Error + Send + Sync + 'static>, + #[cfg(not(send_sync))] + pub cause: Box<dyn Error + 'static>, + pub label_key: &'static str, + pub label: String, +} + +impl PrettyError for ContextError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + fmt.label(self.label_key, &self.label); + } +} + +impl fmt::Display for ContextError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "In {}", self.string) + } +} + +impl Error for ContextError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + Some(self.cause.as_ref()) + } +} diff --git a/third_party/rust/wgpu-core/src/global.rs b/third_party/rust/wgpu-core/src/global.rs new file mode 100644 index 0000000000..9a8c43bf33 --- /dev/null +++ b/third_party/rust/wgpu-core/src/global.rs @@ -0,0 +1,172 @@ +use std::sync::Arc; + +use wgt::Backend; + +use crate::{ + hal_api::HalApi, + hub::{HubReport, Hubs}, + instance::{Instance, Surface}, + registry::{Registry, RegistryReport}, + resource_log, + storage::Element, +}; + +#[derive(Debug, PartialEq, Eq)] +pub struct GlobalReport { + pub surfaces: RegistryReport, + #[cfg(vulkan)] + pub vulkan: Option<HubReport>, + #[cfg(metal)] + pub metal: Option<HubReport>, + #[cfg(dx12)] + pub dx12: Option<HubReport>, + #[cfg(gles)] + pub gl: Option<HubReport>, +} + +impl GlobalReport { + pub fn surfaces(&self) -> &RegistryReport { + &self.surfaces + } + pub fn hub_report(&self, backend: Backend) -> &HubReport { + match backend { + #[cfg(vulkan)] + Backend::Vulkan => self.vulkan.as_ref().unwrap(), + #[cfg(metal)] + Backend::Metal => self.metal.as_ref().unwrap(), + #[cfg(dx12)] + Backend::Dx12 => self.dx12.as_ref().unwrap(), + #[cfg(gles)] + Backend::Gl => self.gl.as_ref().unwrap(), + _ => panic!("HubReport is not supported on this backend"), + } + } +} + +pub struct Global { + pub instance: Instance, + pub surfaces: Registry<Surface>, + pub(crate) hubs: Hubs, +} + +impl Global { + pub fn new(name: &str, instance_desc: wgt::InstanceDescriptor) -> Self { + profiling::scope!("Global::new"); + Self { + instance: Instance::new(name, instance_desc), + surfaces: Registry::without_backend(), + hubs: Hubs::new(), + } + } + + /// # Safety + /// + /// Refer to the creation of wgpu-hal Instance for every backend. + pub unsafe fn from_hal_instance<A: HalApi>(name: &str, hal_instance: A::Instance) -> Self { + profiling::scope!("Global::new"); + Self { + instance: A::create_instance_from_hal(name, hal_instance), + surfaces: Registry::without_backend(), + hubs: Hubs::new(), + } + } + + /// # Safety + /// + /// - The raw instance handle returned must not be manually destroyed. + pub unsafe fn instance_as_hal<A: HalApi>(&self) -> Option<&A::Instance> { + A::instance_as_hal(&self.instance) + } + + /// # Safety + /// + /// - The raw handles obtained from the Instance must not be manually destroyed + pub unsafe fn from_instance(instance: Instance) -> Self { + profiling::scope!("Global::new"); + Self { + instance, + surfaces: Registry::without_backend(), + hubs: Hubs::new(), + } + } + + pub fn clear_backend<A: HalApi>(&self, _dummy: ()) { + let hub = A::hub(self); + let surfaces_locked = self.surfaces.read(); + // this is used for tests, which keep the adapter + hub.clear(&surfaces_locked, false); + } + + pub fn generate_report(&self) -> GlobalReport { + GlobalReport { + surfaces: self.surfaces.generate_report(), + #[cfg(vulkan)] + vulkan: if self.instance.vulkan.is_some() { + Some(self.hubs.vulkan.generate_report()) + } else { + None + }, + #[cfg(metal)] + metal: if self.instance.metal.is_some() { + Some(self.hubs.metal.generate_report()) + } else { + None + }, + #[cfg(dx12)] + dx12: if self.instance.dx12.is_some() { + Some(self.hubs.dx12.generate_report()) + } else { + None + }, + #[cfg(gles)] + gl: if self.instance.gl.is_some() { + Some(self.hubs.gl.generate_report()) + } else { + None + }, + } + } +} + +impl Drop for Global { + fn drop(&mut self) { + profiling::scope!("Global::drop"); + resource_log!("Global::drop"); + let mut surfaces_locked = self.surfaces.write(); + + // destroy hubs before the instance gets dropped + #[cfg(vulkan)] + { + self.hubs.vulkan.clear(&surfaces_locked, true); + } + #[cfg(metal)] + { + self.hubs.metal.clear(&surfaces_locked, true); + } + #[cfg(dx12)] + { + self.hubs.dx12.clear(&surfaces_locked, true); + } + #[cfg(gles)] + { + self.hubs.gl.clear(&surfaces_locked, true); + } + + // destroy surfaces + for element in surfaces_locked.map.drain(..) { + if let Element::Occupied(arc_surface, _) = element { + if let Some(surface) = Arc::into_inner(arc_surface) { + self.instance.destroy_surface(surface); + } else { + panic!("Surface cannot be destroyed because is still in use"); + } + } + } + } +} + +#[cfg(send_sync)] +fn _test_send_sync(global: &Global) { + fn test_internal<T: Send + Sync>(_: T) {} + test_internal(global) +} diff --git a/third_party/rust/wgpu-core/src/hal_api.rs b/third_party/rust/wgpu-core/src/hal_api.rs new file mode 100644 index 0000000000..179024baed --- /dev/null +++ b/third_party/rust/wgpu-core/src/hal_api.rs @@ -0,0 +1,116 @@ +use wgt::{Backend, WasmNotSendSync}; + +use crate::{ + global::Global, + hub::Hub, + instance::{Instance, Surface}, +}; + +pub trait HalApi: hal::Api + 'static + WasmNotSendSync { + const VARIANT: Backend; + fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance; + fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance>; + fn hub(global: &Global) -> &Hub<Self>; + fn get_surface(surface: &Surface) -> Option<&Self::Surface>; +} + +impl HalApi for hal::api::Empty { + const VARIANT: Backend = Backend::Empty; + fn create_instance_from_hal(_: &str, _: Self::Instance) -> Instance { + unimplemented!("called empty api") + } + fn instance_as_hal(_: &Instance) -> Option<&Self::Instance> { + unimplemented!("called empty api") + } + fn hub(_: &Global) -> &Hub<Self> { + unimplemented!("called empty api") + } + fn get_surface(_: &Surface) -> Option<&Self::Surface> { + unimplemented!("called empty api") + } +} + +#[cfg(vulkan)] +impl HalApi for hal::api::Vulkan { + const VARIANT: Backend = Backend::Vulkan; + fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance { + Instance { + name: name.to_owned(), + vulkan: Some(hal_instance), + ..Default::default() + } + } + fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> { + instance.vulkan.as_ref() + } + fn hub(global: &Global) -> &Hub<Self> { + &global.hubs.vulkan + } + fn get_surface(surface: &Surface) -> Option<&Self::Surface> { + surface.raw.downcast_ref::<Self>() + } +} + +#[cfg(metal)] +impl HalApi for hal::api::Metal { + const VARIANT: Backend = Backend::Metal; + fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance { + Instance { + name: name.to_owned(), + metal: Some(hal_instance), + ..Default::default() + } + } + fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> { + instance.metal.as_ref() + } + fn hub(global: &Global) -> &Hub<Self> { + &global.hubs.metal + } + fn get_surface(surface: &Surface) -> Option<&Self::Surface> { + surface.raw.downcast_ref::<Self>() + } +} + +#[cfg(dx12)] +impl HalApi for hal::api::Dx12 { + const VARIANT: Backend = Backend::Dx12; + fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance { + Instance { + name: name.to_owned(), + dx12: Some(hal_instance), + ..Default::default() + } + } + fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> { + instance.dx12.as_ref() + } + fn hub(global: &Global) -> &Hub<Self> { + &global.hubs.dx12 + } + fn get_surface(surface: &Surface) -> Option<&Self::Surface> { + surface.raw.downcast_ref::<Self>() + } +} + +#[cfg(gles)] +impl HalApi for hal::api::Gles { + const VARIANT: Backend = Backend::Gl; + fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance { + #[allow(clippy::needless_update)] + Instance { + name: name.to_owned(), + gl: Some(hal_instance), + ..Default::default() + } + } + fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> { + instance.gl.as_ref() + } + fn hub(global: &Global) -> &Hub<Self> { + &global.hubs.gl + } + fn get_surface(surface: &Surface) -> Option<&Self::Surface> { + surface.raw.downcast_ref::<Self>() + } +} diff --git a/third_party/rust/wgpu-core/src/hash_utils.rs b/third_party/rust/wgpu-core/src/hash_utils.rs new file mode 100644 index 0000000000..f44aad2f1a --- /dev/null +++ b/third_party/rust/wgpu-core/src/hash_utils.rs @@ -0,0 +1,86 @@ +//! Module for hashing utilities. +//! +//! Named hash_utils to prevent clashing with the std::hash module. + +/// HashMap using a fast, non-cryptographic hash algorithm. +pub type FastHashMap<K, V> = + std::collections::HashMap<K, V, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>; +/// HashSet using a fast, non-cryptographic hash algorithm. +pub type FastHashSet<K> = + std::collections::HashSet<K, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>; + +/// IndexMap using a fast, non-cryptographic hash algorithm. +pub type FastIndexMap<K, V> = + indexmap::IndexMap<K, V, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>; + +/// HashMap that uses pre-hashed keys and an identity hasher. +/// +/// This is useful when you only need the key to lookup the value, and don't need to store the key, +/// particularly when the key is large. +pub type PreHashedMap<K, V> = + std::collections::HashMap<PreHashedKey<K>, V, std::hash::BuildHasherDefault<IdentityHasher>>; + +/// A pre-hashed key using FxHash which allows the hashing operation to be disconnected +/// from the storage in the map. +pub struct PreHashedKey<K>(u64, std::marker::PhantomData<fn() -> K>); + +impl<K> std::fmt::Debug for PreHashedKey<K> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("PreHashedKey").field(&self.0).finish() + } +} + +impl<K> Copy for PreHashedKey<K> {} + +impl<K> Clone for PreHashedKey<K> { + fn clone(&self) -> Self { + *self + } +} + +impl<K> PartialEq for PreHashedKey<K> { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl<K> Eq for PreHashedKey<K> {} + +impl<K> std::hash::Hash for PreHashedKey<K> { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + self.0.hash(state); + } +} + +impl<K: std::hash::Hash> PreHashedKey<K> { + pub fn from_key(key: &K) -> Self { + use std::hash::Hasher; + + let mut hasher = rustc_hash::FxHasher::default(); + key.hash(&mut hasher); + Self(hasher.finish(), std::marker::PhantomData) + } +} + +/// A hasher which does nothing. Useful for when you want to use a map with pre-hashed keys. +/// +/// When hashing with this hasher, you must provide exactly 8 bytes. Multiple calls to `write` +/// will overwrite the previous value. +#[derive(Default)] +pub struct IdentityHasher { + hash: u64, +} + +impl std::hash::Hasher for IdentityHasher { + fn write(&mut self, bytes: &[u8]) { + self.hash = u64::from_ne_bytes( + bytes + .try_into() + .expect("identity hasher must be given exactly 8 bytes"), + ); + } + + fn finish(&self) -> u64 { + self.hash + } +} diff --git a/third_party/rust/wgpu-core/src/hub.rs b/third_party/rust/wgpu-core/src/hub.rs new file mode 100644 index 0000000000..0f4589c8b3 --- /dev/null +++ b/third_party/rust/wgpu-core/src/hub.rs @@ -0,0 +1,320 @@ +/*! Allocating resource ids, and tracking the resources they refer to. + +The `wgpu_core` API uses identifiers of type [`Id<R>`] to refer to +resources of type `R`. For example, [`id::DeviceId`] is an alias for +`Id<markers::Device>`, and [`id::BufferId`] is an alias for +`Id<markers::Buffer>`. `Id` implements `Copy`, `Hash`, `Eq`, `Ord`, and +of course `Debug`. + +[`id::DeviceId`]: crate::id::DeviceId +[`id::BufferId`]: crate::id::BufferId + +Each `Id` contains not only an index for the resource it denotes but +also a Backend indicating which `wgpu` backend it belongs to. You +can use the [`gfx_select`] macro to dynamically dispatch on an id's +backend to a function specialized at compile time for a specific +backend. See that macro's documentation for details. + +`Id`s also incorporate a generation number, for additional validation. + +The resources to which identifiers refer are freed explicitly. +Attempting to use an identifier for a resource that has been freed +elicits an error result. + +## Assigning ids to resources + +The users of `wgpu_core` generally want resource ids to be assigned +in one of two ways: + +- Users like `wgpu` want `wgpu_core` to assign ids to resources itself. + For example, `wgpu` expects to call `Global::device_create_buffer` + and have the return value indicate the newly created buffer's id. + +- Users like `player` and Firefox want to allocate ids themselves, and + pass `Global::device_create_buffer` and friends the id to assign the + new resource. + +To accommodate either pattern, `wgpu_core` methods that create +resources all expect an `id_in` argument that the caller can use to +specify the id, and they all return the id used. For example, the +declaration of `Global::device_create_buffer` looks like this: + +```ignore +impl Global { + /* ... */ + pub fn device_create_buffer<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &resource::BufferDescriptor, + id_in: Input<G>, + ) -> (id::BufferId, Option<resource::CreateBufferError>) { + /* ... */ + } + /* ... */ +} +``` + +Users that want to assign resource ids themselves pass in the id they +want as the `id_in` argument, whereas users that want `wgpu_core` +itself to choose ids always pass `()`. In either case, the id +ultimately assigned is returned as the first element of the tuple. + +Producing true identifiers from `id_in` values is the job of an +[`crate::identity::IdentityManager`] or ids will be received from outside through `Option<Id>` arguments. + +## Id allocation and streaming + +Perhaps surprisingly, allowing users to assign resource ids themselves +enables major performance improvements in some applications. + +The `wgpu_core` API is designed for use by Firefox's [WebGPU] +implementation. For security, web content and GPU use must be kept +segregated in separate processes, with all interaction between them +mediated by an inter-process communication protocol. As web content uses +the WebGPU API, the content process sends messages to the GPU process, +which interacts with the platform's GPU APIs on content's behalf, +occasionally sending results back. + +In a classic Rust API, a resource allocation function takes parameters +describing the resource to create, and if creation succeeds, it returns +the resource id in a `Result::Ok` value. However, this design is a poor +fit for the split-process design described above: content must wait for +the reply to its buffer-creation message (say) before it can know which +id it can use in the next message that uses that buffer. On a common +usage pattern, the classic Rust design imposes the latency of a full +cross-process round trip. + +We can avoid incurring these round-trip latencies simply by letting the +content process assign resource ids itself. With this approach, content +can choose an id for the new buffer, send a message to create the +buffer, and then immediately send the next message operating on that +buffer, since it already knows its id. Allowing content and GPU process +activity to be pipelined greatly improves throughput. + +To help propagate errors correctly in this style of usage, when resource +creation fails, the id supplied for that resource is marked to indicate +as much, allowing subsequent operations using that id to be properly +flagged as errors as well. + +[`gfx_select`]: crate::gfx_select +[`process`]: crate::identity::IdentityManager::process +[`Id<R>`]: crate::id::Id +[wrapped in a mutex]: trait.IdentityHandler.html#impl-IdentityHandler%3CI%3E-for-Mutex%3CIdentityManager%3E +[WebGPU]: https://www.w3.org/TR/webgpu/ + +*/ + +use crate::{ + binding_model::{BindGroup, BindGroupLayout, PipelineLayout}, + command::{CommandBuffer, RenderBundle}, + device::{queue::Queue, Device}, + hal_api::HalApi, + instance::{Adapter, Surface}, + pipeline::{ComputePipeline, RenderPipeline, ShaderModule}, + registry::{Registry, RegistryReport}, + resource::{Buffer, QuerySet, Sampler, StagingBuffer, Texture, TextureView}, + storage::{Element, Storage}, +}; +use std::fmt::Debug; + +#[derive(Debug, PartialEq, Eq)] +pub struct HubReport { + pub adapters: RegistryReport, + pub devices: RegistryReport, + pub queues: RegistryReport, + pub pipeline_layouts: RegistryReport, + pub shader_modules: RegistryReport, + pub bind_group_layouts: RegistryReport, + pub bind_groups: RegistryReport, + pub command_buffers: RegistryReport, + pub render_bundles: RegistryReport, + pub render_pipelines: RegistryReport, + pub compute_pipelines: RegistryReport, + pub query_sets: RegistryReport, + pub buffers: RegistryReport, + pub textures: RegistryReport, + pub texture_views: RegistryReport, + pub samplers: RegistryReport, +} + +impl HubReport { + pub fn is_empty(&self) -> bool { + self.adapters.is_empty() + } +} + +#[allow(rustdoc::private_intra_doc_links)] +/// All the resources for a particular backend in a [`crate::global::Global`]. +/// +/// To obtain `global`'s `Hub` for some [`HalApi`] backend type `A`, +/// call [`A::hub(global)`]. +/// +/// ## Locking +/// +/// Each field in `Hub` is a [`Registry`] holding all the values of a +/// particular type of resource, all protected by a single RwLock. +/// So for example, to access any [`Buffer`], you must acquire a read +/// lock on the `Hub`s entire buffers registry. The lock guard +/// gives you access to the `Registry`'s [`Storage`], which you can +/// then index with the buffer's id. (Yes, this design causes +/// contention; see [#2272].) +/// +/// But most `wgpu` operations require access to several different +/// kinds of resource, so you often need to hold locks on several +/// different fields of your [`Hub`] simultaneously. +/// +/// Inside the `Registry` there are `Arc<T>` where `T` is a Resource +/// Lock of `Registry` happens only when accessing to get the specific resource +/// +/// +/// [`A::hub(global)`]: HalApi::hub +pub struct Hub<A: HalApi> { + pub adapters: Registry<Adapter<A>>, + pub devices: Registry<Device<A>>, + pub queues: Registry<Queue<A>>, + pub pipeline_layouts: Registry<PipelineLayout<A>>, + pub shader_modules: Registry<ShaderModule<A>>, + pub bind_group_layouts: Registry<BindGroupLayout<A>>, + pub bind_groups: Registry<BindGroup<A>>, + pub command_buffers: Registry<CommandBuffer<A>>, + pub render_bundles: Registry<RenderBundle<A>>, + pub render_pipelines: Registry<RenderPipeline<A>>, + pub compute_pipelines: Registry<ComputePipeline<A>>, + pub query_sets: Registry<QuerySet<A>>, + pub buffers: Registry<Buffer<A>>, + pub staging_buffers: Registry<StagingBuffer<A>>, + pub textures: Registry<Texture<A>>, + pub texture_views: Registry<TextureView<A>>, + pub samplers: Registry<Sampler<A>>, +} + +impl<A: HalApi> Hub<A> { + fn new() -> Self { + Self { + adapters: Registry::new(A::VARIANT), + devices: Registry::new(A::VARIANT), + queues: Registry::new(A::VARIANT), + pipeline_layouts: Registry::new(A::VARIANT), + shader_modules: Registry::new(A::VARIANT), + bind_group_layouts: Registry::new(A::VARIANT), + bind_groups: Registry::new(A::VARIANT), + command_buffers: Registry::new(A::VARIANT), + render_bundles: Registry::new(A::VARIANT), + render_pipelines: Registry::new(A::VARIANT), + compute_pipelines: Registry::new(A::VARIANT), + query_sets: Registry::new(A::VARIANT), + buffers: Registry::new(A::VARIANT), + staging_buffers: Registry::new(A::VARIANT), + textures: Registry::new(A::VARIANT), + texture_views: Registry::new(A::VARIANT), + samplers: Registry::new(A::VARIANT), + } + } + + //TODO: instead of having a hacky `with_adapters` parameter, + // we should have `clear_device(device_id)` that specifically destroys + // everything related to a logical device. + pub(crate) fn clear(&self, surface_guard: &Storage<Surface>, with_adapters: bool) { + use hal::Surface; + + let mut devices = self.devices.write(); + for element in devices.map.iter() { + if let Element::Occupied(ref device, _) = *element { + device.prepare_to_die(); + } + } + + self.command_buffers.write().map.clear(); + self.samplers.write().map.clear(); + self.texture_views.write().map.clear(); + self.textures.write().map.clear(); + self.buffers.write().map.clear(); + self.bind_groups.write().map.clear(); + self.shader_modules.write().map.clear(); + self.bind_group_layouts.write().map.clear(); + self.pipeline_layouts.write().map.clear(); + self.compute_pipelines.write().map.clear(); + self.render_pipelines.write().map.clear(); + self.query_sets.write().map.clear(); + + for element in surface_guard.map.iter() { + if let Element::Occupied(ref surface, _epoch) = *element { + if let Some(ref mut present) = surface.presentation.lock().take() { + if let Some(device) = present.device.downcast_ref::<A>() { + let suf = A::get_surface(surface); + unsafe { + suf.unwrap().unconfigure(device.raw()); + //TODO: we could destroy the surface here + } + } + } + } + } + + self.queues.write().map.clear(); + devices.map.clear(); + + if with_adapters { + drop(devices); + self.adapters.write().map.clear(); + } + } + + pub(crate) fn surface_unconfigure(&self, device: &Device<A>, surface: &A::Surface) { + unsafe { + use hal::Surface; + surface.unconfigure(device.raw()); + } + } + + pub fn generate_report(&self) -> HubReport { + HubReport { + adapters: self.adapters.generate_report(), + devices: self.devices.generate_report(), + queues: self.queues.generate_report(), + pipeline_layouts: self.pipeline_layouts.generate_report(), + shader_modules: self.shader_modules.generate_report(), + bind_group_layouts: self.bind_group_layouts.generate_report(), + bind_groups: self.bind_groups.generate_report(), + command_buffers: self.command_buffers.generate_report(), + render_bundles: self.render_bundles.generate_report(), + render_pipelines: self.render_pipelines.generate_report(), + compute_pipelines: self.compute_pipelines.generate_report(), + query_sets: self.query_sets.generate_report(), + buffers: self.buffers.generate_report(), + textures: self.textures.generate_report(), + texture_views: self.texture_views.generate_report(), + samplers: self.samplers.generate_report(), + } + } +} + +pub struct Hubs { + #[cfg(vulkan)] + pub(crate) vulkan: Hub<hal::api::Vulkan>, + #[cfg(metal)] + pub(crate) metal: Hub<hal::api::Metal>, + #[cfg(dx12)] + pub(crate) dx12: Hub<hal::api::Dx12>, + #[cfg(gles)] + pub(crate) gl: Hub<hal::api::Gles>, + #[cfg(all(not(vulkan), not(metal), not(dx12), not(gles)))] + pub(crate) empty: Hub<hal::api::Empty>, +} + +impl Hubs { + pub(crate) fn new() -> Self { + Self { + #[cfg(vulkan)] + vulkan: Hub::new(), + #[cfg(metal)] + metal: Hub::new(), + #[cfg(dx12)] + dx12: Hub::new(), + #[cfg(gles)] + gl: Hub::new(), + #[cfg(all(not(vulkan), not(metal), not(dx12), not(gles)))] + empty: Hub::new(), + } + } +} diff --git a/third_party/rust/wgpu-core/src/id.rs b/third_party/rust/wgpu-core/src/id.rs new file mode 100644 index 0000000000..1dbb491e60 --- /dev/null +++ b/third_party/rust/wgpu-core/src/id.rs @@ -0,0 +1,391 @@ +use crate::{Epoch, Index}; +use std::{ + cmp::Ordering, + fmt::{self, Debug}, + hash::Hash, + marker::PhantomData, +}; +use wgt::{Backend, WasmNotSendSync}; + +type IdType = u64; +type ZippedIndex = Index; +type NonZeroId = std::num::NonZeroU64; + +const INDEX_BITS: usize = std::mem::size_of::<ZippedIndex>() * 8; +const EPOCH_BITS: usize = INDEX_BITS - BACKEND_BITS; +const BACKEND_BITS: usize = 3; +const BACKEND_SHIFT: usize = INDEX_BITS * 2 - BACKEND_BITS; +pub const EPOCH_MASK: u32 = (1 << (EPOCH_BITS)) - 1; + +/// The raw underlying representation of an identifier. +#[repr(transparent)] +#[cfg_attr( + any(feature = "serde", feature = "trace"), + derive(serde::Serialize), + serde(into = "SerialId") +)] +#[cfg_attr( + any(feature = "serde", feature = "replay"), + derive(serde::Deserialize), + serde(from = "SerialId") +)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct RawId(NonZeroId); + +impl RawId { + #[doc(hidden)] + #[inline] + pub fn from_non_zero(non_zero: NonZeroId) -> Self { + Self(non_zero) + } + + #[doc(hidden)] + #[inline] + pub fn into_non_zero(self) -> NonZeroId { + self.0 + } + + /// Zip together an identifier and return its raw underlying representation. + pub fn zip(index: Index, epoch: Epoch, backend: Backend) -> RawId { + assert_eq!(0, epoch >> EPOCH_BITS); + assert_eq!(0, (index as IdType) >> INDEX_BITS); + let v = index as IdType + | ((epoch as IdType) << INDEX_BITS) + | ((backend as IdType) << BACKEND_SHIFT); + Self(NonZeroId::new(v).unwrap()) + } + + /// Unzip a raw identifier into its components. + #[allow(trivial_numeric_casts)] + pub fn unzip(self) -> (Index, Epoch, Backend) { + ( + (self.0.get() as ZippedIndex) as Index, + (((self.0.get() >> INDEX_BITS) as ZippedIndex) & (EPOCH_MASK as ZippedIndex)) as Index, + self.backend(), + ) + } + + pub fn backend(self) -> Backend { + match self.0.get() >> (BACKEND_SHIFT) as u8 { + 0 => Backend::Empty, + 1 => Backend::Vulkan, + 2 => Backend::Metal, + 3 => Backend::Dx12, + 4 => Backend::Gl, + _ => unreachable!(), + } + } +} + +/// Coerce a slice of identifiers into a slice of optional raw identifiers. +/// +/// There's two reasons why we know this is correct: +/// * `Option<T>` is guaranteed to be niche-filled to 0's. +/// * The `T` in `Option<T>` can inhabit any representation except 0's, since +/// its underlying representation is `NonZero*`. +pub fn as_option_slice<T: Marker>(ids: &[Id<T>]) -> &[Option<Id<T>>] { + // SAFETY: Any Id<T> is repr(transparent) over `Option<RawId>`, since both + // are backed by non-zero types. + unsafe { std::slice::from_raw_parts(ids.as_ptr().cast(), ids.len()) } +} + +/// An identifier for a wgpu object. +/// +/// An `Id<T>` value identifies a value stored in a [`Global`]'s [`Hub`]'s [`Storage`]. +/// `Storage` implements [`Index`] and [`IndexMut`], accepting `Id` values as indices. +/// +/// ## Note on `Id` typing +/// +/// You might assume that an `Id<T>` can only be used to retrieve a resource of +/// type `T`, but that is not quite the case. The id types in `wgpu-core`'s +/// public API ([`TextureId`], for example) can refer to resources belonging to +/// any backend, but the corresponding resource types ([`Texture<A>`], for +/// example) are always parameterized by a specific backend `A`. +/// +/// So the `T` in `Id<T>` is usually a resource type like `Texture<Empty>`, +/// where [`Empty`] is the `wgpu_hal` dummy back end. These empty types are +/// never actually used, beyond just making sure you access each `Storage` with +/// the right kind of identifier. The members of [`Hub<A>`] pair up each +/// `X<Empty>` type with the resource type `X<A>`, for some specific backend +/// `A`. +/// +/// [`Global`]: crate::global::Global +/// [`Hub`]: crate::hub::Hub +/// [`Hub<A>`]: crate::hub::Hub +/// [`Storage`]: crate::storage::Storage +/// [`Texture<A>`]: crate::resource::Texture +/// [`Index`]: std::ops::Index +/// [`IndexMut`]: std::ops::IndexMut +/// [`Registry`]: crate::hub::Registry +/// [`Empty`]: hal::api::Empty +#[repr(transparent)] +#[cfg_attr(any(feature = "serde", feature = "trace"), derive(serde::Serialize))] +#[cfg_attr(any(feature = "serde", feature = "replay"), derive(serde::Deserialize))] +#[cfg_attr( + any(feature = "serde", feature = "trace", feature = "replay"), + serde(transparent) +)] +pub struct Id<T: Marker>(RawId, PhantomData<T>); + +// This type represents Id in a more readable (and editable) way. +#[allow(dead_code)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +enum SerialId { + // The only variant forces RON to not ignore "Id" + Id(Index, Epoch, Backend), +} + +impl From<RawId> for SerialId { + fn from(id: RawId) -> Self { + let (index, epoch, backend) = id.unzip(); + Self::Id(index, epoch, backend) + } +} + +impl From<SerialId> for RawId { + fn from(id: SerialId) -> Self { + match id { + SerialId::Id(index, epoch, backend) => RawId::zip(index, epoch, backend), + } + } +} + +impl<T> Id<T> +where + T: Marker, +{ + /// # Safety + /// + /// The raw id must be valid for the type. + pub unsafe fn from_raw(raw: RawId) -> Self { + Self(raw, PhantomData) + } + + /// Coerce the identifiers into its raw underlying representation. + pub fn into_raw(self) -> RawId { + self.0 + } + + #[allow(dead_code)] + pub(crate) fn dummy(index: u32) -> Self { + Id::zip(index, 1, Backend::Empty) + } + + #[allow(dead_code)] + pub(crate) fn is_valid(&self) -> bool { + self.backend() != Backend::Empty + } + + /// Get the backend this identifier corresponds to. + #[inline] + pub fn backend(self) -> Backend { + self.0.backend() + } + + /// Transmute this identifier to one with a different marker trait. + /// + /// Legal use is governed through a sealed trait, however it's correctness + /// depends on the current implementation of `wgpu-core`. + #[inline] + pub const fn transmute<U: self::transmute::Transmute<T>>(self) -> Id<U> { + Id(self.0, PhantomData) + } + + #[inline] + pub fn zip(index: Index, epoch: Epoch, backend: Backend) -> Self { + Id(RawId::zip(index, epoch, backend), PhantomData) + } + + #[inline] + pub fn unzip(self) -> (Index, Epoch, Backend) { + self.0.unzip() + } +} + +pub(crate) mod transmute { + // This trait is effectively sealed to prevent illegal transmutes. + pub trait Transmute<U>: super::Marker {} + + // Self-transmute is always legal. + impl<T> Transmute<T> for T where T: super::Marker {} + + // TODO: Remove these once queues have their own identifiers. + impl Transmute<super::markers::Queue> for super::markers::Device {} + impl Transmute<super::markers::Device> for super::markers::Queue {} + impl Transmute<super::markers::CommandBuffer> for super::markers::CommandEncoder {} + impl Transmute<super::markers::CommandEncoder> for super::markers::CommandBuffer {} +} + +impl<T> Copy for Id<T> where T: Marker {} + +impl<T> Clone for Id<T> +where + T: Marker, +{ + #[inline] + fn clone(&self) -> Self { + *self + } +} + +impl<T> Debug for Id<T> +where + T: Marker, +{ + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let (index, epoch, backend) = self.unzip(); + let backend = match backend { + Backend::Empty => "_", + Backend::Vulkan => "vk", + Backend::Metal => "mtl", + Backend::Dx12 => "d3d12", + Backend::Gl => "gl", + Backend::BrowserWebGpu => "webgpu", + }; + write!(formatter, "Id({index},{epoch},{backend})")?; + Ok(()) + } +} + +impl<T> Hash for Id<T> +where + T: Marker, +{ + #[inline] + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + self.0.hash(state); + } +} + +impl<T> PartialEq for Id<T> +where + T: Marker, +{ + #[inline] + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl<T> Eq for Id<T> where T: Marker {} + +impl<T> PartialOrd for Id<T> +where + T: Marker, +{ + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.0.partial_cmp(&other.0) + } +} + +impl<T> Ord for Id<T> +where + T: Marker, +{ + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.0.cmp(&other.0) + } +} + +/// Marker trait used to determine which types uniquely identify a resource. +/// +/// For example, `Device<A>` will have the same type of identifier as +/// `Device<B>` because `Device<T>` for any `T` defines the same maker type. +pub trait Marker: 'static + WasmNotSendSync {} + +// This allows `()` to be used as a marker type for tests. +// +// We don't want these in production code, since they essentially remove type +// safety, like how identifiers across different types can be compared. +#[cfg(test)] +impl Marker for () {} + +/// Define identifiers for each resource. +macro_rules! ids { + ($( + $(#[$($meta:meta)*])* + pub type $name:ident $marker:ident; + )*) => { + /// Marker types for each resource. + pub mod markers { + $( + #[derive(Debug)] + pub enum $marker {} + impl super::Marker for $marker {} + )* + } + + $( + $(#[$($meta)*])* + pub type $name = Id<self::markers::$marker>; + )* + } +} + +ids! { + pub type AdapterId Adapter; + pub type SurfaceId Surface; + pub type DeviceId Device; + pub type QueueId Queue; + pub type BufferId Buffer; + pub type StagingBufferId StagingBuffer; + pub type TextureViewId TextureView; + pub type TextureId Texture; + pub type SamplerId Sampler; + pub type BindGroupLayoutId BindGroupLayout; + pub type PipelineLayoutId PipelineLayout; + pub type BindGroupId BindGroup; + pub type ShaderModuleId ShaderModule; + pub type RenderPipelineId RenderPipeline; + pub type ComputePipelineId ComputePipeline; + pub type CommandEncoderId CommandEncoder; + pub type CommandBufferId CommandBuffer; + pub type RenderPassEncoderId RenderPassEncoder; + pub type ComputePassEncoderId ComputePassEncoder; + pub type RenderBundleEncoderId RenderBundleEncoder; + pub type RenderBundleId RenderBundle; + pub type QuerySetId QuerySet; +} + +#[test] +fn test_id_backend() { + for &b in &[ + Backend::Empty, + Backend::Vulkan, + Backend::Metal, + Backend::Dx12, + Backend::Gl, + ] { + let id = crate::id::Id::<()>::zip(1, 0, b); + let (_id, _epoch, backend) = id.unzip(); + assert_eq!(id.backend(), b); + assert_eq!(backend, b); + } +} + +#[test] +fn test_id() { + let last_index = ((1u64 << INDEX_BITS) - 1) as Index; + let indexes = [1, last_index / 2 - 1, last_index / 2 + 1, last_index]; + let epochs = [1, EPOCH_MASK / 2 - 1, EPOCH_MASK / 2 + 1, EPOCH_MASK]; + let backends = [ + Backend::Empty, + Backend::Vulkan, + Backend::Metal, + Backend::Dx12, + Backend::Gl, + ]; + for &i in &indexes { + for &e in &epochs { + for &b in &backends { + let id = crate::id::Id::<()>::zip(i, e, b); + let (index, epoch, backend) = id.unzip(); + assert_eq!(index, i); + assert_eq!(epoch, e); + assert_eq!(backend, b); + } + } + } +} diff --git a/third_party/rust/wgpu-core/src/identity.rs b/third_party/rust/wgpu-core/src/identity.rs new file mode 100644 index 0000000000..0e34055c74 --- /dev/null +++ b/third_party/rust/wgpu-core/src/identity.rs @@ -0,0 +1,129 @@ +use parking_lot::Mutex; +use wgt::Backend; + +use crate::{ + id::{Id, Marker}, + Epoch, FastHashMap, Index, +}; +use std::{fmt::Debug, marker::PhantomData}; + +/// A simple structure to allocate [`Id`] identifiers. +/// +/// Calling [`alloc`] returns a fresh, never-before-seen id. Calling [`free`] +/// marks an id as dead; it will never be returned again by `alloc`. +/// +/// Use `IdentityManager::default` to construct new instances. +/// +/// `IdentityManager` returns `Id`s whose index values are suitable for use as +/// indices into a `Storage<T>` that holds those ids' referents: +/// +/// - Every live id has a distinct index value. Each live id's index selects a +/// distinct element in the vector. +/// +/// - `IdentityManager` prefers low index numbers. If you size your vector to +/// accommodate the indices produced here, the vector's length will reflect +/// the highwater mark of actual occupancy. +/// +/// - `IdentityManager` reuses the index values of freed ids before returning +/// ids with new index values. Freed vector entries get reused. +/// +/// See the module-level documentation for an overview of how this +/// fits together. +/// +/// [`Id`]: crate::id::Id +/// [`Backend`]: wgt::Backend; +/// [`alloc`]: IdentityManager::alloc +/// [`free`]: IdentityManager::free +#[derive(Debug, Default)] +pub(super) struct IdentityValues { + free: Vec<(Index, Epoch)>, + //sorted by Index + used: FastHashMap<Epoch, Vec<Index>>, + count: usize, +} + +impl IdentityValues { + /// Allocate a fresh, never-before-seen id with the given `backend`. + /// + /// The backend is incorporated into the id, so that ids allocated with + /// different `backend` values are always distinct. + pub fn alloc<T: Marker>(&mut self, backend: Backend) -> Id<T> { + self.count += 1; + match self.free.pop() { + Some((index, epoch)) => Id::zip(index, epoch + 1, backend), + None => { + let epoch = 1; + let used = self.used.entry(epoch).or_insert_with(Default::default); + let index = if let Some(i) = used.iter().max_by_key(|v| *v) { + i + 1 + } else { + 0 + }; + used.push(index); + Id::zip(index, epoch, backend) + } + } + } + + pub fn mark_as_used<T: Marker>(&mut self, id: Id<T>) -> Id<T> { + self.count += 1; + let (index, epoch, _backend) = id.unzip(); + let used = self.used.entry(epoch).or_insert_with(Default::default); + used.push(index); + id + } + + /// Free `id`. It will never be returned from `alloc` again. + pub fn release<T: Marker>(&mut self, id: Id<T>) { + let (index, epoch, _backend) = id.unzip(); + self.free.push((index, epoch)); + self.count -= 1; + } + + pub fn count(&self) -> usize { + self.count + } +} + +#[derive(Debug)] +pub struct IdentityManager<T: Marker> { + pub(super) values: Mutex<IdentityValues>, + _phantom: PhantomData<T>, +} + +impl<T: Marker> IdentityManager<T> { + pub fn process(&self, backend: Backend) -> Id<T> { + self.values.lock().alloc(backend) + } + pub fn mark_as_used(&self, id: Id<T>) -> Id<T> { + self.values.lock().mark_as_used(id) + } + pub fn free(&self, id: Id<T>) { + self.values.lock().release(id) + } +} + +impl<T: Marker> IdentityManager<T> { + pub fn new() -> Self { + Self { + values: Mutex::new(IdentityValues::default()), + _phantom: PhantomData, + } + } +} + +#[test] +fn test_epoch_end_of_life() { + use crate::id; + + let man = IdentityManager::<id::markers::Buffer>::new(); + let forced_id = man.mark_as_used(id::BufferId::zip(0, 1, Backend::Empty)); + assert_eq!(forced_id.unzip().0, 0); + let id1 = man.process(Backend::Empty); + assert_eq!(id1.unzip().0, 1); + man.free(id1); + let id2 = man.process(Backend::Empty); + // confirm that the epoch 1 is no longer re-used + assert_eq!(id2.unzip().0, 1); + assert_eq!(id2.unzip().1, 2); +} diff --git a/third_party/rust/wgpu-core/src/init_tracker/buffer.rs b/third_party/rust/wgpu-core/src/init_tracker/buffer.rs new file mode 100644 index 0000000000..2c0fa8d372 --- /dev/null +++ b/third_party/rust/wgpu-core/src/init_tracker/buffer.rs @@ -0,0 +1,39 @@ +use super::{InitTracker, MemoryInitKind}; +use crate::{hal_api::HalApi, resource::Buffer}; +use std::{ops::Range, sync::Arc}; + +#[derive(Debug, Clone)] +pub(crate) struct BufferInitTrackerAction<A: HalApi> { + pub buffer: Arc<Buffer<A>>, + pub range: Range<wgt::BufferAddress>, + pub kind: MemoryInitKind, +} + +pub(crate) type BufferInitTracker = InitTracker<wgt::BufferAddress>; + +impl BufferInitTracker { + /// Checks if an action has/requires any effect on the initialization status + /// and shrinks its range if possible. + pub(crate) fn check_action<A: HalApi>( + &self, + action: &BufferInitTrackerAction<A>, + ) -> Option<BufferInitTrackerAction<A>> { + self.create_action(&action.buffer, action.range.clone(), action.kind) + } + + /// Creates an action if it would have any effect on the initialization + /// status and shrinks the range if possible. + pub(crate) fn create_action<A: HalApi>( + &self, + buffer: &Arc<Buffer<A>>, + query_range: Range<wgt::BufferAddress>, + kind: MemoryInitKind, + ) -> Option<BufferInitTrackerAction<A>> { + self.check(query_range) + .map(|range| BufferInitTrackerAction { + buffer: buffer.clone(), + range, + kind, + }) + } +} diff --git a/third_party/rust/wgpu-core/src/init_tracker/mod.rs b/third_party/rust/wgpu-core/src/init_tracker/mod.rs new file mode 100644 index 0000000000..ccaac1e16f --- /dev/null +++ b/third_party/rust/wgpu-core/src/init_tracker/mod.rs @@ -0,0 +1,384 @@ +/*! Lazy initialization of texture and buffer memory. + +The WebGPU specification requires all texture & buffer memory to be +zero initialized on first read. To avoid unnecessary inits, we track +the initialization status of every resource and perform inits lazily. + +The granularity is different for buffers and textures: + +- Buffer: Byte granularity to support usecases with large, partially + bound buffers well. + +- Texture: Mip-level per layer. That is, a 2D surface is either + completely initialized or not, subrects are not tracked. + +Every use of a buffer/texture generates a InitTrackerAction which are +recorded and later resolved at queue submit by merging them with the +current state and each other in execution order. + +It is important to note that from the point of view of the memory init +system there are two kind of writes: + +- **Full writes**: Any kind of memcpy operation. These cause a + `MemoryInitKind.ImplicitlyInitialized` action. + +- **(Potentially) partial writes**: For example, write use in a + Shader. The system is not able to determine if a resource is fully + initialized afterwards but is no longer allowed to perform any + clears, therefore this leads to a + `MemoryInitKind.ImplicitlyInitialized` action, exactly like a read + would. + + */ + +use smallvec::SmallVec; +use std::{fmt, iter, ops::Range}; + +mod buffer; +mod texture; + +pub(crate) use buffer::{BufferInitTracker, BufferInitTrackerAction}; +pub(crate) use texture::{ + has_copy_partial_init_tracker_coverage, TextureInitRange, TextureInitTracker, + TextureInitTrackerAction, +}; + +#[derive(Debug, Clone, Copy)] +pub(crate) enum MemoryInitKind { + // The memory range is going to be written by an already initialized source, + // thus doesn't need extra attention other than marking as initialized. + ImplicitlyInitialized, + // The memory range is going to be read, therefore needs to ensure prior + // initialization. + NeedsInitializedMemory, +} + +// Most of the time a resource is either fully uninitialized (one element) or +// initialized (zero elements). +type UninitializedRangeVec<Idx> = SmallVec<[Range<Idx>; 1]>; + +/// Tracks initialization status of a linear range from 0..size +#[derive(Debug, Clone)] +pub(crate) struct InitTracker<Idx: Ord + Copy + Default> { + /// Non-overlapping list of all uninitialized ranges, sorted by + /// range end. + uninitialized_ranges: UninitializedRangeVec<Idx>, +} + +pub(crate) struct InitTrackerDrain<'a, Idx: fmt::Debug + Ord + Copy> { + uninitialized_ranges: &'a mut UninitializedRangeVec<Idx>, + drain_range: Range<Idx>, + first_index: usize, + next_index: usize, +} + +impl<'a, Idx> Iterator for InitTrackerDrain<'a, Idx> +where + Idx: fmt::Debug + Ord + Copy, +{ + type Item = Range<Idx>; + + fn next(&mut self) -> Option<Self::Item> { + if let Some(r) = self + .uninitialized_ranges + .get(self.next_index) + .and_then(|range| { + if range.start < self.drain_range.end { + Some(range.clone()) + } else { + None + } + }) + { + self.next_index += 1; + Some(r.start.max(self.drain_range.start)..r.end.min(self.drain_range.end)) + } else { + let num_affected = self.next_index - self.first_index; + if num_affected == 0 { + return None; + } + let first_range = &mut self.uninitialized_ranges[self.first_index]; + + // Split one "big" uninitialized range? + if num_affected == 1 + && first_range.start < self.drain_range.start + && first_range.end > self.drain_range.end + { + let old_start = first_range.start; + first_range.start = self.drain_range.end; + self.uninitialized_ranges + .insert(self.first_index, old_start..self.drain_range.start); + } + // Adjust border ranges and delete everything in-between. + else { + let remove_start = if first_range.start >= self.drain_range.start { + self.first_index + } else { + first_range.end = self.drain_range.start; + self.first_index + 1 + }; + + let last_range = &mut self.uninitialized_ranges[self.next_index - 1]; + let remove_end = if last_range.end <= self.drain_range.end { + self.next_index + } else { + last_range.start = self.drain_range.end; + self.next_index - 1 + }; + + self.uninitialized_ranges.drain(remove_start..remove_end); + } + + None + } + } +} + +impl<'a, Idx> Drop for InitTrackerDrain<'a, Idx> +where + Idx: fmt::Debug + Ord + Copy, +{ + fn drop(&mut self) { + if self.next_index <= self.first_index { + for _ in self {} + } + } +} + +impl<Idx> InitTracker<Idx> +where + Idx: fmt::Debug + Ord + Copy + Default, +{ + pub(crate) fn new(size: Idx) -> Self { + Self { + uninitialized_ranges: iter::once(Idx::default()..size).collect(), + } + } + + /// Checks for uninitialized ranges within a given query range. + /// + /// If `query_range` includes any uninitialized portions of this init + /// tracker's resource, return the smallest subrange of `query_range` that + /// covers all uninitialized regions. + /// + /// The returned range may be larger than necessary, to keep this function + /// O(log n). + pub(crate) fn check(&self, query_range: Range<Idx>) -> Option<Range<Idx>> { + let index = self + .uninitialized_ranges + .partition_point(|r| r.end <= query_range.start); + self.uninitialized_ranges + .get(index) + .and_then(|start_range| { + if start_range.start < query_range.end { + let start = start_range.start.max(query_range.start); + match self.uninitialized_ranges.get(index + 1) { + Some(next_range) => { + if next_range.start < query_range.end { + // Would need to keep iterating for more + // accurate upper bound. Don't do that here. + Some(start..query_range.end) + } else { + Some(start..start_range.end.min(query_range.end)) + } + } + None => Some(start..start_range.end.min(query_range.end)), + } + } else { + None + } + }) + } + + // Drains uninitialized ranges in a query range. + pub(crate) fn drain(&mut self, drain_range: Range<Idx>) -> InitTrackerDrain<Idx> { + let index = self + .uninitialized_ranges + .partition_point(|r| r.end <= drain_range.start); + InitTrackerDrain { + drain_range, + uninitialized_ranges: &mut self.uninitialized_ranges, + first_index: index, + next_index: index, + } + } +} + +impl InitTracker<u32> { + // Makes a single entry uninitialized if not already uninitialized + #[allow(dead_code)] + pub(crate) fn discard(&mut self, pos: u32) { + // first range where end>=idx + let r_idx = self.uninitialized_ranges.partition_point(|r| r.end < pos); + if let Some(r) = self.uninitialized_ranges.get(r_idx) { + // Extend range at end + if r.end == pos { + // merge with next? + if let Some(right) = self.uninitialized_ranges.get(r_idx + 1) { + if right.start == pos + 1 { + self.uninitialized_ranges[r_idx] = r.start..right.end; + self.uninitialized_ranges.remove(r_idx + 1); + return; + } + } + self.uninitialized_ranges[r_idx] = r.start..(pos + 1); + } else if r.start > pos { + // may still extend range at beginning + if r.start == pos + 1 { + self.uninitialized_ranges[r_idx] = pos..r.end; + } else { + // previous range end must be smaller than idx, therefore no merge possible + self.uninitialized_ranges.push(pos..(pos + 1)); + } + } + } else { + self.uninitialized_ranges.push(pos..(pos + 1)); + } + } +} + +#[cfg(test)] +mod test { + use std::ops::Range; + + type Tracker = super::InitTracker<u32>; + + #[test] + fn check_for_newly_created_tracker() { + let tracker = Tracker::new(10); + assert_eq!(tracker.check(0..10), Some(0..10)); + assert_eq!(tracker.check(0..3), Some(0..3)); + assert_eq!(tracker.check(3..4), Some(3..4)); + assert_eq!(tracker.check(4..10), Some(4..10)); + } + + #[test] + fn check_for_drained_tracker() { + let mut tracker = Tracker::new(10); + tracker.drain(0..10); + assert_eq!(tracker.check(0..10), None); + assert_eq!(tracker.check(0..3), None); + assert_eq!(tracker.check(3..4), None); + assert_eq!(tracker.check(4..10), None); + } + + #[test] + fn check_for_partially_filled_tracker() { + let mut tracker = Tracker::new(25); + // Two regions of uninitialized memory + tracker.drain(0..5); + tracker.drain(10..15); + tracker.drain(20..25); + + assert_eq!(tracker.check(0..25), Some(5..25)); // entire range + + assert_eq!(tracker.check(0..5), None); // left non-overlapping + assert_eq!(tracker.check(3..8), Some(5..8)); // left overlapping region + assert_eq!(tracker.check(3..17), Some(5..17)); // left overlapping region + contained region + + // right overlapping region + contained region (yes, doesn't fix range end!) + assert_eq!(tracker.check(8..22), Some(8..22)); + // right overlapping region + assert_eq!(tracker.check(17..22), Some(17..20)); + // right non-overlapping + assert_eq!(tracker.check(20..25), None); + } + + #[test] + fn drain_already_drained() { + let mut tracker = Tracker::new(30); + tracker.drain(10..20); + + // Overlapping with non-cleared + tracker.drain(5..15); // Left overlap + tracker.drain(15..25); // Right overlap + tracker.drain(0..30); // Inner overlap + + // Clear fully cleared + tracker.drain(0..30); + + assert_eq!(tracker.check(0..30), None); + } + + #[test] + fn drain_never_returns_ranges_twice_for_same_range() { + let mut tracker = Tracker::new(19); + assert_eq!(tracker.drain(0..19).count(), 1); + assert_eq!(tracker.drain(0..19).count(), 0); + + let mut tracker = Tracker::new(17); + assert_eq!(tracker.drain(5..8).count(), 1); + assert_eq!(tracker.drain(5..8).count(), 0); + assert_eq!(tracker.drain(1..3).count(), 1); + assert_eq!(tracker.drain(1..3).count(), 0); + assert_eq!(tracker.drain(7..13).count(), 1); + assert_eq!(tracker.drain(7..13).count(), 0); + } + + #[test] + fn drain_splits_ranges_correctly() { + let mut tracker = Tracker::new(1337); + assert_eq!( + tracker.drain(21..42).collect::<Vec<Range<u32>>>(), + vec![21..42] + ); + assert_eq!( + tracker.drain(900..1000).collect::<Vec<Range<u32>>>(), + vec![900..1000] + ); + + // Split ranges. + assert_eq!( + tracker.drain(5..1003).collect::<Vec<Range<u32>>>(), + vec![5..21, 42..900, 1000..1003] + ); + assert_eq!( + tracker.drain(0..1337).collect::<Vec<Range<u32>>>(), + vec![0..5, 1003..1337] + ); + } + + #[test] + fn discard_adds_range_on_cleared() { + let mut tracker = Tracker::new(10); + tracker.drain(0..10); + tracker.discard(0); + tracker.discard(5); + tracker.discard(9); + assert_eq!(tracker.check(0..1), Some(0..1)); + assert_eq!(tracker.check(1..5), None); + assert_eq!(tracker.check(5..6), Some(5..6)); + assert_eq!(tracker.check(6..9), None); + assert_eq!(tracker.check(9..10), Some(9..10)); + } + + #[test] + fn discard_does_nothing_on_uncleared() { + let mut tracker = Tracker::new(10); + tracker.discard(0); + tracker.discard(5); + tracker.discard(9); + assert_eq!(tracker.uninitialized_ranges.len(), 1); + assert_eq!(tracker.uninitialized_ranges[0], 0..10); + } + + #[test] + fn discard_extends_ranges() { + let mut tracker = Tracker::new(10); + tracker.drain(3..7); + tracker.discard(2); + tracker.discard(7); + assert_eq!(tracker.uninitialized_ranges.len(), 2); + assert_eq!(tracker.uninitialized_ranges[0], 0..3); + assert_eq!(tracker.uninitialized_ranges[1], 7..10); + } + + #[test] + fn discard_merges_ranges() { + let mut tracker = Tracker::new(10); + tracker.drain(3..4); + tracker.discard(3); + assert_eq!(tracker.uninitialized_ranges.len(), 1); + assert_eq!(tracker.uninitialized_ranges[0], 0..10); + } +} diff --git a/third_party/rust/wgpu-core/src/init_tracker/texture.rs b/third_party/rust/wgpu-core/src/init_tracker/texture.rs new file mode 100644 index 0000000000..a859b5f784 --- /dev/null +++ b/third_party/rust/wgpu-core/src/init_tracker/texture.rs @@ -0,0 +1,103 @@ +use super::{InitTracker, MemoryInitKind}; +use crate::{hal_api::HalApi, resource::Texture, track::TextureSelector}; +use arrayvec::ArrayVec; +use std::{ops::Range, sync::Arc}; + +#[derive(Debug, Clone)] +pub(crate) struct TextureInitRange { + pub(crate) mip_range: Range<u32>, + // Strictly array layers. We do *not* track volume slices separately. + pub(crate) layer_range: Range<u32>, +} + +// Returns true if a copy operation doesn't fully cover the texture init +// tracking granularity. I.e. if this function returns true for a pending copy +// operation, the target texture needs to be ensured to be initialized first! +pub(crate) fn has_copy_partial_init_tracker_coverage( + copy_size: &wgt::Extent3d, + mip_level: u32, + desc: &wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>, +) -> bool { + let target_size = desc.mip_level_size(mip_level).unwrap(); + copy_size.width != target_size.width + || copy_size.height != target_size.height + || (desc.dimension == wgt::TextureDimension::D3 + && copy_size.depth_or_array_layers != target_size.depth_or_array_layers) +} + +impl From<TextureSelector> for TextureInitRange { + fn from(selector: TextureSelector) -> Self { + TextureInitRange { + mip_range: selector.mips, + layer_range: selector.layers, + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct TextureInitTrackerAction<A: HalApi> { + pub(crate) texture: Arc<Texture<A>>, + pub(crate) range: TextureInitRange, + pub(crate) kind: MemoryInitKind, +} + +pub(crate) type TextureLayerInitTracker = InitTracker<u32>; + +#[derive(Debug)] +pub(crate) struct TextureInitTracker { + pub mips: ArrayVec<TextureLayerInitTracker, { hal::MAX_MIP_LEVELS as usize }>, +} + +impl TextureInitTracker { + pub(crate) fn new(mip_level_count: u32, depth_or_array_layers: u32) -> Self { + TextureInitTracker { + mips: std::iter::repeat(TextureLayerInitTracker::new(depth_or_array_layers)) + .take(mip_level_count as usize) + .collect(), + } + } + + pub(crate) fn check_action<A: HalApi>( + &self, + action: &TextureInitTrackerAction<A>, + ) -> Option<TextureInitTrackerAction<A>> { + let mut mip_range_start = std::usize::MAX; + let mut mip_range_end = std::usize::MIN; + let mut layer_range_start = std::u32::MAX; + let mut layer_range_end = std::u32::MIN; + + for (i, mip_tracker) in self + .mips + .iter() + .enumerate() + .take(action.range.mip_range.end as usize) + .skip(action.range.mip_range.start as usize) + { + if let Some(uninitialized_layer_range) = + mip_tracker.check(action.range.layer_range.clone()) + { + mip_range_start = mip_range_start.min(i); + mip_range_end = i + 1; + layer_range_start = layer_range_start.min(uninitialized_layer_range.start); + layer_range_end = layer_range_end.max(uninitialized_layer_range.end); + }; + } + + if mip_range_start < mip_range_end && layer_range_start < layer_range_end { + Some(TextureInitTrackerAction { + texture: action.texture.clone(), + range: TextureInitRange { + mip_range: mip_range_start as u32..mip_range_end as u32, + layer_range: layer_range_start..layer_range_end, + }, + kind: action.kind, + }) + } else { + None + } + } + + pub(crate) fn discard(&mut self, mip_level: u32, layer: u32) { + self.mips[mip_level as usize].discard(layer); + } +} diff --git a/third_party/rust/wgpu-core/src/instance.rs b/third_party/rust/wgpu-core/src/instance.rs new file mode 100644 index 0000000000..582571c2b8 --- /dev/null +++ b/third_party/rust/wgpu-core/src/instance.rs @@ -0,0 +1,1174 @@ +use std::sync::Arc; + +use crate::{ + any_surface::AnySurface, + api_log, + device::{queue::Queue, resource::Device, DeviceDescriptor}, + global::Global, + hal_api::HalApi, + id::markers, + id::{AdapterId, DeviceId, Id, Marker, QueueId, SurfaceId}, + present::Presentation, + resource::{Resource, ResourceInfo, ResourceType}, + resource_log, LabelHelpers, DOWNLEVEL_WARNING_MESSAGE, +}; + +use parking_lot::Mutex; +use wgt::{Backend, Backends, PowerPreference}; + +use hal::{Adapter as _, Instance as _, OpenDevice}; +use thiserror::Error; + +pub type RequestAdapterOptions = wgt::RequestAdapterOptions<SurfaceId>; +type HalInstance<A> = <A as hal::Api>::Instance; + +#[derive(Clone, Debug, Error)] +#[error("Limit '{name}' value {requested} is better than allowed {allowed}")] +pub struct FailedLimit { + name: &'static str, + requested: u64, + allowed: u64, +} + +fn check_limits(requested: &wgt::Limits, allowed: &wgt::Limits) -> Vec<FailedLimit> { + let mut failed = Vec::new(); + + requested.check_limits_with_fail_fn(allowed, false, |name, requested, allowed| { + failed.push(FailedLimit { + name, + requested, + allowed, + }) + }); + + failed +} + +#[test] +fn downlevel_default_limits_less_than_default_limits() { + let res = check_limits(&wgt::Limits::downlevel_defaults(), &wgt::Limits::default()); + assert!( + res.is_empty(), + "Downlevel limits are greater than default limits", + ) +} + +#[derive(Default)] +pub struct Instance { + #[allow(dead_code)] + pub name: String, + #[cfg(vulkan)] + pub vulkan: Option<HalInstance<hal::api::Vulkan>>, + #[cfg(metal)] + pub metal: Option<HalInstance<hal::api::Metal>>, + #[cfg(dx12)] + pub dx12: Option<HalInstance<hal::api::Dx12>>, + #[cfg(gles)] + pub gl: Option<HalInstance<hal::api::Gles>>, + pub flags: wgt::InstanceFlags, +} + +impl Instance { + pub fn new(name: &str, instance_desc: wgt::InstanceDescriptor) -> Self { + fn init<A: HalApi>(_: A, instance_desc: &wgt::InstanceDescriptor) -> Option<A::Instance> { + if instance_desc.backends.contains(A::VARIANT.into()) { + let hal_desc = hal::InstanceDescriptor { + name: "wgpu", + flags: instance_desc.flags, + dx12_shader_compiler: instance_desc.dx12_shader_compiler.clone(), + gles_minor_version: instance_desc.gles_minor_version, + }; + match unsafe { hal::Instance::init(&hal_desc) } { + Ok(instance) => { + log::debug!("Instance::new: created {:?} backend", A::VARIANT); + Some(instance) + } + Err(err) => { + log::debug!( + "Instance::new: failed to create {:?} backend: {:?}", + A::VARIANT, + err + ); + None + } + } + } else { + log::trace!("Instance::new: backend {:?} not requested", A::VARIANT); + None + } + } + + Self { + name: name.to_string(), + #[cfg(vulkan)] + vulkan: init(hal::api::Vulkan, &instance_desc), + #[cfg(metal)] + metal: init(hal::api::Metal, &instance_desc), + #[cfg(dx12)] + dx12: init(hal::api::Dx12, &instance_desc), + #[cfg(gles)] + gl: init(hal::api::Gles, &instance_desc), + flags: instance_desc.flags, + } + } + + pub(crate) fn destroy_surface(&self, surface: Surface) { + fn destroy<A: HalApi>(instance: &Option<A::Instance>, surface: AnySurface) { + unsafe { + if let Some(suf) = surface.take::<A>() { + instance.as_ref().unwrap().destroy_surface(suf); + } + } + } + match surface.raw.backend() { + #[cfg(vulkan)] + Backend::Vulkan => destroy::<hal::api::Vulkan>(&self.vulkan, surface.raw), + #[cfg(metal)] + Backend::Metal => destroy::<hal::api::Metal>(&self.metal, surface.raw), + #[cfg(dx12)] + Backend::Dx12 => destroy::<hal::api::Dx12>(&self.dx12, surface.raw), + #[cfg(gles)] + Backend::Gl => destroy::<hal::api::Gles>(&self.gl, surface.raw), + _ => unreachable!(), + } + } +} + +pub struct Surface { + pub(crate) presentation: Mutex<Option<Presentation>>, + pub(crate) info: ResourceInfo<Surface>, + pub(crate) raw: AnySurface, +} + +impl Resource for Surface { + const TYPE: ResourceType = "Surface"; + + type Marker = markers::Surface; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } + + fn label(&self) -> String { + String::from("<Surface>") + } +} + +impl Surface { + pub fn get_capabilities<A: HalApi>( + &self, + adapter: &Adapter<A>, + ) -> Result<hal::SurfaceCapabilities, GetSurfaceSupportError> { + let suf = A::get_surface(self).ok_or(GetSurfaceSupportError::Unsupported)?; + profiling::scope!("surface_capabilities"); + let caps = unsafe { + adapter + .raw + .adapter + .surface_capabilities(suf) + .ok_or(GetSurfaceSupportError::Unsupported)? + }; + + Ok(caps) + } +} + +pub struct Adapter<A: HalApi> { + pub(crate) raw: hal::ExposedAdapter<A>, + pub(crate) info: ResourceInfo<Adapter<A>>, +} + +impl<A: HalApi> Adapter<A> { + fn new(mut raw: hal::ExposedAdapter<A>) -> Self { + // WebGPU requires this offset alignment as lower bound on all adapters. + const MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND: u32 = 32; + + let limits = &mut raw.capabilities.limits; + + limits.min_uniform_buffer_offset_alignment = limits + .min_uniform_buffer_offset_alignment + .max(MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND); + limits.min_storage_buffer_offset_alignment = limits + .min_storage_buffer_offset_alignment + .max(MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND); + + Self { + raw, + info: ResourceInfo::new("<Adapter>"), + } + } + + pub fn is_surface_supported(&self, surface: &Surface) -> bool { + let suf = A::get_surface(surface); + + // If get_surface returns None, then the API does not advertise support for the surface. + // + // This could occur if the user is running their app on Wayland but Vulkan does not support + // VK_KHR_wayland_surface. + match suf { + Some(suf) => unsafe { self.raw.adapter.surface_capabilities(suf) }.is_some(), + None => false, + } + } + + pub(crate) fn get_texture_format_features( + &self, + format: wgt::TextureFormat, + ) -> wgt::TextureFormatFeatures { + use hal::TextureFormatCapabilities as Tfc; + + let caps = unsafe { self.raw.adapter.texture_format_capabilities(format) }; + let mut allowed_usages = wgt::TextureUsages::empty(); + + allowed_usages.set(wgt::TextureUsages::COPY_SRC, caps.contains(Tfc::COPY_SRC)); + allowed_usages.set(wgt::TextureUsages::COPY_DST, caps.contains(Tfc::COPY_DST)); + allowed_usages.set( + wgt::TextureUsages::TEXTURE_BINDING, + caps.contains(Tfc::SAMPLED), + ); + allowed_usages.set( + wgt::TextureUsages::STORAGE_BINDING, + caps.contains(Tfc::STORAGE), + ); + allowed_usages.set( + wgt::TextureUsages::RENDER_ATTACHMENT, + caps.intersects(Tfc::COLOR_ATTACHMENT | Tfc::DEPTH_STENCIL_ATTACHMENT), + ); + + let mut flags = wgt::TextureFormatFeatureFlags::empty(); + flags.set( + wgt::TextureFormatFeatureFlags::STORAGE_READ_WRITE, + caps.contains(Tfc::STORAGE_READ_WRITE), + ); + + flags.set( + wgt::TextureFormatFeatureFlags::FILTERABLE, + caps.contains(Tfc::SAMPLED_LINEAR), + ); + + flags.set( + wgt::TextureFormatFeatureFlags::BLENDABLE, + caps.contains(Tfc::COLOR_ATTACHMENT_BLEND), + ); + + flags.set( + wgt::TextureFormatFeatureFlags::MULTISAMPLE_X2, + caps.contains(Tfc::MULTISAMPLE_X2), + ); + flags.set( + wgt::TextureFormatFeatureFlags::MULTISAMPLE_X4, + caps.contains(Tfc::MULTISAMPLE_X4), + ); + flags.set( + wgt::TextureFormatFeatureFlags::MULTISAMPLE_X8, + caps.contains(Tfc::MULTISAMPLE_X8), + ); + flags.set( + wgt::TextureFormatFeatureFlags::MULTISAMPLE_X16, + caps.contains(Tfc::MULTISAMPLE_X16), + ); + + flags.set( + wgt::TextureFormatFeatureFlags::MULTISAMPLE_RESOLVE, + caps.contains(Tfc::MULTISAMPLE_RESOLVE), + ); + + wgt::TextureFormatFeatures { + allowed_usages, + flags, + } + } + + fn create_device_and_queue_from_hal( + self: &Arc<Self>, + hal_device: OpenDevice<A>, + desc: &DeviceDescriptor, + instance_flags: wgt::InstanceFlags, + trace_path: Option<&std::path::Path>, + ) -> Result<(Device<A>, Queue<A>), RequestDeviceError> { + api_log!("Adapter::create_device"); + + if let Ok(device) = Device::new( + hal_device.device, + &hal_device.queue, + self, + desc, + trace_path, + instance_flags, + ) { + let queue = Queue { + device: None, + raw: Some(hal_device.queue), + info: ResourceInfo::new("<Queue>"), + }; + return Ok((device, queue)); + } + Err(RequestDeviceError::OutOfMemory) + } + + fn create_device_and_queue( + self: &Arc<Self>, + desc: &DeviceDescriptor, + instance_flags: wgt::InstanceFlags, + trace_path: Option<&std::path::Path>, + ) -> Result<(Device<A>, Queue<A>), RequestDeviceError> { + // Verify all features were exposed by the adapter + if !self.raw.features.contains(desc.required_features) { + return Err(RequestDeviceError::UnsupportedFeature( + desc.required_features - self.raw.features, + )); + } + + let caps = &self.raw.capabilities; + if wgt::Backends::PRIMARY.contains(wgt::Backends::from(A::VARIANT)) + && !caps.downlevel.is_webgpu_compliant() + { + let missing_flags = wgt::DownlevelFlags::compliant() - caps.downlevel.flags; + log::warn!( + "Missing downlevel flags: {:?}\n{}", + missing_flags, + DOWNLEVEL_WARNING_MESSAGE + ); + log::warn!("{:#?}", caps.downlevel); + } + + // Verify feature preconditions + if desc + .required_features + .contains(wgt::Features::MAPPABLE_PRIMARY_BUFFERS) + && self.raw.info.device_type == wgt::DeviceType::DiscreteGpu + { + log::warn!( + "Feature MAPPABLE_PRIMARY_BUFFERS enabled on a discrete gpu. \ + This is a massive performance footgun and likely not what you wanted" + ); + } + + if let Some(_) = desc.label { + //TODO + } + + if let Some(failed) = check_limits(&desc.required_limits, &caps.limits).pop() { + return Err(RequestDeviceError::LimitsExceeded(failed)); + } + + let open = unsafe { + self.raw + .adapter + .open(desc.required_features, &desc.required_limits) + } + .map_err(|err| match err { + hal::DeviceError::Lost => RequestDeviceError::DeviceLost, + hal::DeviceError::OutOfMemory => RequestDeviceError::OutOfMemory, + hal::DeviceError::ResourceCreationFailed => RequestDeviceError::Internal, + })?; + + self.create_device_and_queue_from_hal(open, desc, instance_flags, trace_path) + } +} + +impl<A: HalApi> Resource for Adapter<A> { + const TYPE: ResourceType = "Adapter"; + + type Marker = markers::Adapter; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum IsSurfaceSupportedError { + #[error("Invalid adapter")] + InvalidAdapter, + #[error("Invalid surface")] + InvalidSurface, +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum GetSurfaceSupportError { + #[error("Invalid adapter")] + InvalidAdapter, + #[error("Invalid surface")] + InvalidSurface, + #[error("Surface is not supported by the adapter")] + Unsupported, +} + +#[derive(Clone, Debug, Error)] +/// Error when requesting a device from the adaptor +#[non_exhaustive] +pub enum RequestDeviceError { + #[error("Parent adapter is invalid")] + InvalidAdapter, + #[error("Connection to device was lost during initialization")] + DeviceLost, + #[error("Device initialization failed due to implementation specific errors")] + Internal, + #[error(transparent)] + LimitsExceeded(#[from] FailedLimit), + #[error("Device has no queue supporting graphics")] + NoGraphicsQueue, + #[error("Not enough memory left to request device")] + OutOfMemory, + #[error("Unsupported features were requested: {0:?}")] + UnsupportedFeature(wgt::Features), +} + +pub enum AdapterInputs<'a, M: Marker> { + IdSet(&'a [Id<M>]), + Mask(Backends, fn(Backend) -> Option<Id<M>>), +} + +impl<M: Marker> AdapterInputs<'_, M> { + fn find(&self, b: Backend) -> Option<Option<Id<M>>> { + match *self { + Self::IdSet(ids) => Some(Some(ids.iter().find(|id| id.backend() == b).copied()?)), + Self::Mask(bits, ref fun) => { + if bits.contains(b.into()) { + Some(fun(b)) + } else { + None + } + } + } + } +} + +#[derive(Clone, Debug, Error)] +#[error("Adapter is invalid")] +pub struct InvalidAdapter; + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum RequestAdapterError { + #[error("No suitable adapter found")] + NotFound, + #[error("Surface {0:?} is invalid")] + InvalidSurface(SurfaceId), +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CreateSurfaceError { + #[error("No backend is available")] + NoSupportedBackend, + #[error(transparent)] + InstanceError(#[from] hal::InstanceError), +} + +impl Global { + /// # Safety + /// + /// - `display_handle` must be a valid object to create a surface upon. + /// - `window_handle` must remain valid as long as the returned + /// [`SurfaceId`] is being used. + #[cfg(feature = "raw-window-handle")] + pub unsafe fn instance_create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + id_in: Option<SurfaceId>, + ) -> Result<SurfaceId, CreateSurfaceError> { + profiling::scope!("Instance::create_surface"); + + fn init<A: HalApi>( + inst: &Option<A::Instance>, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Option<Result<AnySurface, hal::InstanceError>> { + inst.as_ref().map(|inst| unsafe { + match inst.create_surface(display_handle, window_handle) { + Ok(raw) => Ok(AnySurface::new::<A>(raw)), + Err(e) => Err(e), + } + }) + } + + let mut hal_surface: Option<Result<AnySurface, hal::InstanceError>> = None; + + #[cfg(vulkan)] + if hal_surface.is_none() { + hal_surface = + init::<hal::api::Vulkan>(&self.instance.vulkan, display_handle, window_handle); + } + #[cfg(metal)] + if hal_surface.is_none() { + hal_surface = + init::<hal::api::Metal>(&self.instance.metal, display_handle, window_handle); + } + #[cfg(dx12)] + if hal_surface.is_none() { + hal_surface = + init::<hal::api::Dx12>(&self.instance.dx12, display_handle, window_handle); + } + #[cfg(gles)] + if hal_surface.is_none() { + hal_surface = init::<hal::api::Gles>(&self.instance.gl, display_handle, window_handle); + } + + let hal_surface = hal_surface.ok_or(CreateSurfaceError::NoSupportedBackend)??; + + let surface = Surface { + presentation: Mutex::new(None), + info: ResourceInfo::new("<Surface>"), + raw: hal_surface, + }; + + let (id, _) = self.surfaces.prepare(id_in).assign(surface); + Ok(id) + } + + /// # Safety + /// + /// `layer` must be a valid pointer. + #[cfg(metal)] + pub unsafe fn instance_create_surface_metal( + &self, + layer: *mut std::ffi::c_void, + id_in: Option<SurfaceId>, + ) -> SurfaceId { + profiling::scope!("Instance::create_surface_metal"); + + let surface = Surface { + presentation: Mutex::new(None), + info: ResourceInfo::new("<Surface>"), + raw: { + let hal_surface = self + .instance + .metal + .as_ref() + .map(|inst| { + // we don't want to link to metal-rs for this + #[allow(clippy::transmute_ptr_to_ref)] + inst.create_surface_from_layer(unsafe { std::mem::transmute(layer) }) + }) + .unwrap(); + AnySurface::new::<hal::api::Metal>(hal_surface) + }, + }; + + let (id, _) = self.surfaces.prepare(id_in).assign(surface); + id + } + + #[cfg(dx12)] + /// # Safety + /// + /// The visual must be valid and able to be used to make a swapchain with. + pub unsafe fn instance_create_surface_from_visual( + &self, + visual: *mut std::ffi::c_void, + id_in: Option<SurfaceId>, + ) -> SurfaceId { + profiling::scope!("Instance::instance_create_surface_from_visual"); + + let surface = Surface { + presentation: Mutex::new(None), + info: ResourceInfo::new("<Surface>"), + raw: { + let hal_surface = self + .instance + .dx12 + .as_ref() + .map(|inst| unsafe { inst.create_surface_from_visual(visual as _) }) + .unwrap(); + AnySurface::new::<hal::api::Dx12>(hal_surface) + }, + }; + + let (id, _) = self.surfaces.prepare(id_in).assign(surface); + id + } + + #[cfg(dx12)] + /// # Safety + /// + /// The surface_handle must be valid and able to be used to make a swapchain with. + pub unsafe fn instance_create_surface_from_surface_handle( + &self, + surface_handle: *mut std::ffi::c_void, + id_in: Option<SurfaceId>, + ) -> SurfaceId { + profiling::scope!("Instance::instance_create_surface_from_surface_handle"); + + let surface = Surface { + presentation: Mutex::new(None), + info: ResourceInfo::new("<Surface>"), + raw: { + let hal_surface = self + .instance + .dx12 + .as_ref() + .map(|inst| unsafe { inst.create_surface_from_surface_handle(surface_handle) }) + .unwrap(); + AnySurface::new::<hal::api::Dx12>(hal_surface) + }, + }; + + let (id, _) = self.surfaces.prepare(id_in).assign(surface); + id + } + + #[cfg(dx12)] + /// # Safety + /// + /// The swap_chain_panel must be valid and able to be used to make a swapchain with. + pub unsafe fn instance_create_surface_from_swap_chain_panel( + &self, + swap_chain_panel: *mut std::ffi::c_void, + id_in: Option<SurfaceId>, + ) -> SurfaceId { + profiling::scope!("Instance::instance_create_surface_from_swap_chain_panel"); + + let surface = Surface { + presentation: Mutex::new(None), + info: ResourceInfo::new("<Surface>"), + raw: { + let hal_surface = self + .instance + .dx12 + .as_ref() + .map(|inst| unsafe { + inst.create_surface_from_swap_chain_panel(swap_chain_panel as _) + }) + .unwrap(); + AnySurface::new::<hal::api::Dx12>(hal_surface) + }, + }; + + let (id, _) = self.surfaces.prepare(id_in).assign(surface); + id + } + + pub fn surface_drop(&self, id: SurfaceId) { + profiling::scope!("Surface::drop"); + + api_log!("Surface::drop {id:?}"); + + fn unconfigure<A: HalApi>(global: &Global, surface: &AnySurface, present: &Presentation) { + let hub = HalApi::hub(global); + if let Some(hal_surface) = surface.downcast_ref::<A>() { + if let Some(device) = present.device.downcast_ref::<A>() { + hub.surface_unconfigure(device, hal_surface); + } + } + } + + let surface = self.surfaces.unregister(id); + if let Some(surface) = Arc::into_inner(surface.unwrap()) { + if let Some(present) = surface.presentation.lock().take() { + #[cfg(vulkan)] + unconfigure::<hal::api::Vulkan>(self, &surface.raw, &present); + #[cfg(metal)] + unconfigure::<hal::api::Metal>(self, &surface.raw, &present); + #[cfg(dx12)] + unconfigure::<hal::api::Dx12>(self, &surface.raw, &present); + #[cfg(gles)] + unconfigure::<hal::api::Gles>(self, &surface.raw, &present); + } + + self.instance.destroy_surface(surface); + } else { + panic!("Surface cannot be destroyed because is still in use"); + } + } + + fn enumerate<A: HalApi>( + &self, + _: A, + instance: &Option<A::Instance>, + inputs: &AdapterInputs<markers::Adapter>, + list: &mut Vec<AdapterId>, + ) { + let inst = match *instance { + Some(ref inst) => inst, + None => return, + }; + let id_backend = match inputs.find(A::VARIANT) { + Some(id) => id, + None => return, + }; + + profiling::scope!("enumerating", &*format!("{:?}", A::VARIANT)); + let hub = HalApi::hub(self); + + let hal_adapters = unsafe { inst.enumerate_adapters() }; + for raw in hal_adapters { + let adapter = Adapter::new(raw); + log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info); + let (id, _) = hub.adapters.prepare(id_backend).assign(adapter); + list.push(id); + } + } + + pub fn enumerate_adapters(&self, inputs: AdapterInputs<markers::Adapter>) -> Vec<AdapterId> { + profiling::scope!("Instance::enumerate_adapters"); + api_log!("Instance::enumerate_adapters"); + + let mut adapters = Vec::new(); + + #[cfg(vulkan)] + self.enumerate( + hal::api::Vulkan, + &self.instance.vulkan, + &inputs, + &mut adapters, + ); + #[cfg(metal)] + self.enumerate( + hal::api::Metal, + &self.instance.metal, + &inputs, + &mut adapters, + ); + #[cfg(dx12)] + self.enumerate(hal::api::Dx12, &self.instance.dx12, &inputs, &mut adapters); + #[cfg(gles)] + self.enumerate(hal::api::Gles, &self.instance.gl, &inputs, &mut adapters); + + adapters + } + + fn select<A: HalApi>( + &self, + selected: &mut usize, + new_id: Option<AdapterId>, + mut list: Vec<hal::ExposedAdapter<A>>, + ) -> Option<AdapterId> { + match selected.checked_sub(list.len()) { + Some(left) => { + *selected = left; + None + } + None => { + let adapter = Adapter::new(list.swap_remove(*selected)); + log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info); + let (id, _) = HalApi::hub(self).adapters.prepare(new_id).assign(adapter); + Some(id) + } + } + } + + pub fn request_adapter( + &self, + desc: &RequestAdapterOptions, + inputs: AdapterInputs<markers::Adapter>, + ) -> Result<AdapterId, RequestAdapterError> { + profiling::scope!("Instance::request_adapter"); + api_log!("Instance::request_adapter"); + + fn gather<A: HalApi>( + _: A, + instance: Option<&A::Instance>, + inputs: &AdapterInputs<markers::Adapter>, + compatible_surface: Option<&Surface>, + force_software: bool, + device_types: &mut Vec<wgt::DeviceType>, + ) -> (Option<Id<markers::Adapter>>, Vec<hal::ExposedAdapter<A>>) { + let id = inputs.find(A::VARIANT); + match (id, instance) { + (Some(id), Some(inst)) => { + let mut adapters = unsafe { inst.enumerate_adapters() }; + if force_software { + adapters.retain(|exposed| exposed.info.device_type == wgt::DeviceType::Cpu); + } + if let Some(surface) = compatible_surface { + let surface = &A::get_surface(surface); + adapters.retain(|exposed| unsafe { + // If the surface does not exist for this backend, + // then the surface is not supported. + surface.is_some() + && exposed + .adapter + .surface_capabilities(surface.unwrap()) + .is_some() + }); + } + device_types.extend(adapters.iter().map(|ad| ad.info.device_type)); + (id, adapters) + } + _ => (None, Vec::new()), + } + } + + let compatible_surface = desc + .compatible_surface + .map(|id| { + self.surfaces + .get(id) + .map_err(|_| RequestAdapterError::InvalidSurface(id)) + }) + .transpose()?; + let compatible_surface = compatible_surface.as_ref().map(|surface| surface.as_ref()); + let mut device_types = Vec::new(); + + #[cfg(vulkan)] + let (id_vulkan, adapters_vk) = gather( + hal::api::Vulkan, + self.instance.vulkan.as_ref(), + &inputs, + compatible_surface, + desc.force_fallback_adapter, + &mut device_types, + ); + #[cfg(metal)] + let (id_metal, adapters_metal) = gather( + hal::api::Metal, + self.instance.metal.as_ref(), + &inputs, + compatible_surface, + desc.force_fallback_adapter, + &mut device_types, + ); + #[cfg(dx12)] + let (id_dx12, adapters_dx12) = gather( + hal::api::Dx12, + self.instance.dx12.as_ref(), + &inputs, + compatible_surface, + desc.force_fallback_adapter, + &mut device_types, + ); + #[cfg(gles)] + let (id_gl, adapters_gl) = gather( + hal::api::Gles, + self.instance.gl.as_ref(), + &inputs, + compatible_surface, + desc.force_fallback_adapter, + &mut device_types, + ); + + if device_types.is_empty() { + return Err(RequestAdapterError::NotFound); + } + + let (mut integrated, mut discrete, mut virt, mut cpu, mut other) = + (None, None, None, None, None); + + for (i, ty) in device_types.into_iter().enumerate() { + match ty { + wgt::DeviceType::IntegratedGpu => { + integrated = integrated.or(Some(i)); + } + wgt::DeviceType::DiscreteGpu => { + discrete = discrete.or(Some(i)); + } + wgt::DeviceType::VirtualGpu => { + virt = virt.or(Some(i)); + } + wgt::DeviceType::Cpu => { + cpu = cpu.or(Some(i)); + } + wgt::DeviceType::Other => { + other = other.or(Some(i)); + } + } + } + + let preferred_gpu = match desc.power_preference { + // Since devices of type "Other" might really be "Unknown" and come + // from APIs like OpenGL that don't specify device type, Prefer more + // Specific types over Other. + // + // This means that backends which do provide accurate device types + // will be preferred if their device type indicates an actual + // hardware GPU (integrated or discrete). + PowerPreference::LowPower => integrated.or(discrete).or(other).or(virt).or(cpu), + PowerPreference::HighPerformance => discrete.or(integrated).or(other).or(virt).or(cpu), + PowerPreference::None => { + let option_min = |a: Option<usize>, b: Option<usize>| { + if let (Some(a), Some(b)) = (a, b) { + Some(a.min(b)) + } else { + a.or(b) + } + }; + // Pick the lowest id of these types + option_min(option_min(discrete, integrated), other) + } + }; + + let mut selected = preferred_gpu.unwrap_or(0); + #[cfg(vulkan)] + if let Some(id) = self.select(&mut selected, id_vulkan, adapters_vk) { + return Ok(id); + } + #[cfg(metal)] + if let Some(id) = self.select(&mut selected, id_metal, adapters_metal) { + return Ok(id); + } + #[cfg(dx12)] + if let Some(id) = self.select(&mut selected, id_dx12, adapters_dx12) { + return Ok(id); + } + #[cfg(gles)] + if let Some(id) = self.select(&mut selected, id_gl, adapters_gl) { + return Ok(id); + } + let _ = selected; + + log::warn!("Some adapters are present, but enumerating them failed!"); + Err(RequestAdapterError::NotFound) + } + + /// # Safety + /// + /// `hal_adapter` must be created from this global internal instance handle. + pub unsafe fn create_adapter_from_hal<A: HalApi>( + &self, + hal_adapter: hal::ExposedAdapter<A>, + input: Option<AdapterId>, + ) -> AdapterId { + profiling::scope!("Instance::create_adapter_from_hal"); + + let fid = A::hub(self).adapters.prepare(input); + + let (id, _adapter): (_, Arc<Adapter<A>>) = match A::VARIANT { + #[cfg(vulkan)] + Backend::Vulkan => fid.assign(Adapter::new(hal_adapter)), + #[cfg(metal)] + Backend::Metal => fid.assign(Adapter::new(hal_adapter)), + #[cfg(dx12)] + Backend::Dx12 => fid.assign(Adapter::new(hal_adapter)), + #[cfg(gles)] + Backend::Gl => fid.assign(Adapter::new(hal_adapter)), + _ => unreachable!(), + }; + resource_log!("Created Adapter {:?}", id); + id + } + + pub fn adapter_get_info<A: HalApi>( + &self, + adapter_id: AdapterId, + ) -> Result<wgt::AdapterInfo, InvalidAdapter> { + let hub = A::hub(self); + + hub.adapters + .get(adapter_id) + .map(|adapter| adapter.raw.info.clone()) + .map_err(|_| InvalidAdapter) + } + + pub fn adapter_get_texture_format_features<A: HalApi>( + &self, + adapter_id: AdapterId, + format: wgt::TextureFormat, + ) -> Result<wgt::TextureFormatFeatures, InvalidAdapter> { + let hub = A::hub(self); + + hub.adapters + .get(adapter_id) + .map(|adapter| adapter.get_texture_format_features(format)) + .map_err(|_| InvalidAdapter) + } + + pub fn adapter_features<A: HalApi>( + &self, + adapter_id: AdapterId, + ) -> Result<wgt::Features, InvalidAdapter> { + let hub = A::hub(self); + + hub.adapters + .get(adapter_id) + .map(|adapter| adapter.raw.features) + .map_err(|_| InvalidAdapter) + } + + pub fn adapter_limits<A: HalApi>( + &self, + adapter_id: AdapterId, + ) -> Result<wgt::Limits, InvalidAdapter> { + let hub = A::hub(self); + + hub.adapters + .get(adapter_id) + .map(|adapter| adapter.raw.capabilities.limits.clone()) + .map_err(|_| InvalidAdapter) + } + + pub fn adapter_downlevel_capabilities<A: HalApi>( + &self, + adapter_id: AdapterId, + ) -> Result<wgt::DownlevelCapabilities, InvalidAdapter> { + let hub = A::hub(self); + + hub.adapters + .get(adapter_id) + .map(|adapter| adapter.raw.capabilities.downlevel.clone()) + .map_err(|_| InvalidAdapter) + } + + pub fn adapter_get_presentation_timestamp<A: HalApi>( + &self, + adapter_id: AdapterId, + ) -> Result<wgt::PresentationTimestamp, InvalidAdapter> { + let hub = A::hub(self); + + let adapter = hub.adapters.get(adapter_id).map_err(|_| InvalidAdapter)?; + + Ok(unsafe { adapter.raw.adapter.get_presentation_timestamp() }) + } + + pub fn adapter_drop<A: HalApi>(&self, adapter_id: AdapterId) { + profiling::scope!("Adapter::drop"); + api_log!("Adapter::drop {adapter_id:?}"); + + let hub = A::hub(self); + let mut adapters_locked = hub.adapters.write(); + + let free = match adapters_locked.get(adapter_id) { + Ok(adapter) => Arc::strong_count(adapter) == 1, + Err(_) => true, + }; + if free { + hub.adapters + .unregister_locked(adapter_id, &mut *adapters_locked); + } + } +} + +impl Global { + pub fn adapter_request_device<A: HalApi>( + &self, + adapter_id: AdapterId, + desc: &DeviceDescriptor, + trace_path: Option<&std::path::Path>, + device_id_in: Option<DeviceId>, + queue_id_in: Option<QueueId>, + ) -> (DeviceId, QueueId, Option<RequestDeviceError>) { + profiling::scope!("Adapter::request_device"); + api_log!("Adapter::request_device"); + + let hub = A::hub(self); + let device_fid = hub.devices.prepare(device_id_in); + let queue_fid = hub.queues.prepare(queue_id_in); + + let error = loop { + let adapter = match hub.adapters.get(adapter_id) { + Ok(adapter) => adapter, + Err(_) => break RequestDeviceError::InvalidAdapter, + }; + let (device, mut queue) = + match adapter.create_device_and_queue(desc, self.instance.flags, trace_path) { + Ok((device, queue)) => (device, queue), + Err(e) => break e, + }; + let (device_id, _) = device_fid.assign(device); + resource_log!("Created Device {:?}", device_id); + + let device = hub.devices.get(device_id).unwrap(); + queue.device = Some(device.clone()); + + let (queue_id, _) = queue_fid.assign(queue); + resource_log!("Created Queue {:?}", queue_id); + + device.queue_id.write().replace(queue_id); + + return (device_id, queue_id, None); + }; + + let device_id = device_fid.assign_error(desc.label.borrow_or_default()); + let queue_id = queue_fid.assign_error(desc.label.borrow_or_default()); + (device_id, queue_id, Some(error)) + } + + /// # Safety + /// + /// - `hal_device` must be created from `adapter_id` or its internal handle. + /// - `desc` must be a subset of `hal_device` features and limits. + pub unsafe fn create_device_from_hal<A: HalApi>( + &self, + adapter_id: AdapterId, + hal_device: OpenDevice<A>, + desc: &DeviceDescriptor, + trace_path: Option<&std::path::Path>, + device_id_in: Option<DeviceId>, + queue_id_in: Option<QueueId>, + ) -> (DeviceId, QueueId, Option<RequestDeviceError>) { + profiling::scope!("Global::create_device_from_hal"); + + let hub = A::hub(self); + let devices_fid = hub.devices.prepare(device_id_in); + let queues_fid = hub.queues.prepare(queue_id_in); + + let error = loop { + let adapter = match hub.adapters.get(adapter_id) { + Ok(adapter) => adapter, + Err(_) => break RequestDeviceError::InvalidAdapter, + }; + let (device, mut queue) = match adapter.create_device_and_queue_from_hal( + hal_device, + desc, + self.instance.flags, + trace_path, + ) { + Ok(device) => device, + Err(e) => break e, + }; + let (device_id, _) = devices_fid.assign(device); + resource_log!("Created Device {:?}", device_id); + + let device = hub.devices.get(device_id).unwrap(); + queue.device = Some(device.clone()); + + let (queue_id, _) = queues_fid.assign(queue); + resource_log!("Created Queue {:?}", queue_id); + + device.queue_id.write().replace(queue_id); + + return (device_id, queue_id, None); + }; + + let device_id = devices_fid.assign_error(desc.label.borrow_or_default()); + let queue_id = queues_fid.assign_error(desc.label.borrow_or_default()); + (device_id, queue_id, Some(error)) + } +} + +/// Generates a set of backends from a comma separated list of case-insensitive backend names. +/// +/// Whitespace is stripped, so both 'gl, dx12' and 'gl,dx12' are valid. +/// +/// Always returns WEBGPU on wasm over webgpu. +/// +/// Names: +/// - vulkan = "vulkan" or "vk" +/// - dx12 = "dx12" or "d3d12" +/// - metal = "metal" or "mtl" +/// - gles = "opengl" or "gles" or "gl" +/// - webgpu = "webgpu" +pub fn parse_backends_from_comma_list(string: &str) -> Backends { + let mut backends = Backends::empty(); + for backend in string.to_lowercase().split(',') { + backends |= match backend.trim() { + "vulkan" | "vk" => Backends::VULKAN, + "dx12" | "d3d12" => Backends::DX12, + "metal" | "mtl" => Backends::METAL, + "opengl" | "gles" | "gl" => Backends::GL, + "webgpu" => Backends::BROWSER_WEBGPU, + b => { + log::warn!("unknown backend string '{}'", b); + continue; + } + } + } + + if backends.is_empty() { + log::warn!("no valid backend strings found!"); + } + + backends +} diff --git a/third_party/rust/wgpu-core/src/lib.rs b/third_party/rust/wgpu-core/src/lib.rs new file mode 100644 index 0000000000..9f6526fc11 --- /dev/null +++ b/third_party/rust/wgpu-core/src/lib.rs @@ -0,0 +1,390 @@ +//! This library safely implements WebGPU on native platforms. +//! It is designed for integration into browsers, as well as wrapping +//! into other language-specific user-friendly libraries. +//! +//! ## Feature flags +// NOTE: feature docs. below should be kept in sync. with `Cargo.toml`! +//! +//! - **`api_log_info`** --- Log all API entry points at info instead of trace level. +//! - **`resource_log_info`** --- Log resource lifecycle management at info instead of trace level. +//! - **`link`** _(enabled by default)_ --- Use static linking for libraries. Disable to manually +//! link. Enabled by default. +//! - **`renderdoc`** --- Support the Renderdoc graphics debugger: +//! [https://renderdoc.org/](https://renderdoc.org/) +//! - **`strict_asserts`** --- Apply run-time checks, even in release builds. These are in addition +//! to the validation carried out at public APIs in all builds. +//! - **`serde`** --- Enables serialization via `serde` on common wgpu types. +//! - **`trace`** --- Enable API tracing. +//! - **`replay`** --- Enable API replaying +//! - **`wgsl`** --- Enable `ShaderModuleSource::Wgsl` +//! - **`fragile-send-sync-non-atomic-wasm`** --- Implement `Send` and `Sync` on Wasm, but only if +//! atomics are not enabled. +//! +//! WebGL/WebGPU objects can not be shared between threads. However, it can be useful to +//! artificially mark them as `Send` and `Sync` anyways to make it easier to write cross-platform +//! code. This is technically _very_ unsafe in a multithreaded environment, but on a wasm binary +//! compiled without atomics we know we are definitely not in a multithreaded environment. +//! +//! ### Backends, passed through to wgpu-hal +//! +//! - **`metal`** --- Enable the `metal` backend. +//! - **`vulkan`** --- Enable the `vulkan` backend. +//! - **`gles`** --- Enable the `GLES` backend. +//! +//! This is used for all of GLES, OpenGL, and WebGL. +//! - **`dx12`** --- Enable the `dx12` backend. + +// When we have no backends, we end up with a lot of dead or otherwise unreachable code. +#![cfg_attr( + all( + not(all(feature = "vulkan", not(target_arch = "wasm32"))), + not(all(feature = "metal", any(target_os = "macos", target_os = "ios"))), + not(all(feature = "dx12", windows)), + not(feature = "gles"), + ), + allow(unused, clippy::let_and_return) +)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![allow( + // It is much clearer to assert negative conditions with eq! false + clippy::bool_assert_comparison, + // We use loops for getting early-out of scope without closures. + clippy::never_loop, + // We don't use syntax sugar where it's not necessary. + clippy::match_like_matches_macro, + // Redundant matching is more explicit. + clippy::redundant_pattern_matching, + // Explicit lifetimes are often easier to reason about. + clippy::needless_lifetimes, + // No need for defaults in the internal types. + clippy::new_without_default, + // Needless updates are more scalable, easier to play with features. + clippy::needless_update, + // Need many arguments for some core functions to be able to re-use code in many situations. + clippy::too_many_arguments, + // For some reason `rustc` can warn about these in const generics even + // though they are required. + unused_braces, + // It gets in the way a lot and does not prevent bugs in practice. + clippy::pattern_type_mismatch, +)] +#![warn( + trivial_casts, + trivial_numeric_casts, + unsafe_op_in_unsafe_fn, + unused_extern_crates, + unused_qualifications +)] + +pub mod any_surface; +pub mod binding_model; +pub mod command; +mod conv; +pub mod device; +pub mod error; +pub mod global; +pub mod hal_api; +mod hash_utils; +pub mod hub; +pub mod id; +pub mod identity; +mod init_tracker; +pub mod instance; +pub mod pipeline; +mod pool; +pub mod present; +pub mod registry; +pub mod resource; +mod snatch; +pub mod storage; +mod track; +// This is public for users who pre-compile shaders while still wanting to +// preserve all run-time checks that `wgpu-core` does. +// See <https://github.com/gfx-rs/wgpu/issues/3103>, after which this can be +// made private again. +pub mod validation; + +pub use hal::{api, MAX_BIND_GROUPS, MAX_COLOR_ATTACHMENTS, MAX_VERTEX_BUFFERS}; +pub use naga; + +use std::{borrow::Cow, os::raw::c_char}; + +pub(crate) use hash_utils::*; + +/// The index of a queue submission. +/// +/// These are the values stored in `Device::fence`. +type SubmissionIndex = hal::FenceValue; + +type Index = u32; +type Epoch = u32; + +pub type RawString = *const c_char; +pub type Label<'a> = Option<Cow<'a, str>>; + +trait LabelHelpers<'a> { + fn borrow_option(&'a self) -> Option<&'a str>; + fn to_hal(&'a self, flags: wgt::InstanceFlags) -> Option<&'a str>; + fn borrow_or_default(&'a self) -> &'a str; +} +impl<'a> LabelHelpers<'a> for Label<'a> { + fn borrow_option(&'a self) -> Option<&'a str> { + self.as_ref().map(|cow| cow.as_ref()) + } + fn to_hal(&'a self, flags: wgt::InstanceFlags) -> Option<&'a str> { + if flags.contains(wgt::InstanceFlags::DISCARD_HAL_LABELS) { + return None; + } + + self.as_ref().map(|cow| cow.as_ref()) + } + fn borrow_or_default(&'a self) -> &'a str { + self.borrow_option().unwrap_or_default() + } +} + +pub fn hal_label(opt: Option<&str>, flags: wgt::InstanceFlags) -> Option<&str> { + if flags.contains(wgt::InstanceFlags::DISCARD_HAL_LABELS) { + return None; + } + + opt +} + +const DOWNLEVEL_WARNING_MESSAGE: &str = "The underlying API or device in use does not \ +support enough features to be a fully compliant implementation of WebGPU. A subset of the features can still be used. \ +If you are running this program on native and not in a browser and wish to limit the features you use to the supported subset, \ +call Adapter::downlevel_properties or Device::downlevel_properties to get a listing of the features the current \ +platform supports."; +const DOWNLEVEL_ERROR_MESSAGE: &str = "This is not an invalid use of WebGPU: the underlying API or device does not \ +support enough features to be a fully compliant implementation. A subset of the features can still be used. \ +If you are running this program on native and not in a browser and wish to work around this issue, call \ +Adapter::downlevel_properties or Device::downlevel_properties to get a listing of the features the current \ +platform supports."; + +// #[cfg] attributes in exported macros are interesting! +// +// The #[cfg] conditions in a macro's expansion are evaluated using the +// configuration options (features, target architecture and os, etc.) in force +// where the macro is *used*, not where it is *defined*. That is, if crate A +// defines a macro like this: +// +// #[macro_export] +// macro_rules! if_bleep { +// { } => { +// #[cfg(feature = "bleep")] +// bleep(); +// } +// } +// +// and then crate B uses it like this: +// +// fn f() { +// if_bleep! { } +// } +// +// then it is crate B's `"bleep"` feature, not crate A's, that determines +// whether the macro expands to a function call or an empty statement. The +// entire configuration predicate is evaluated in the use's context, not the +// definition's. +// +// Since `wgpu-core` selects back ends using features, we need to make sure the +// arms of the `gfx_select!` macro are pruned according to `wgpu-core`'s +// features, not those of whatever crate happens to be using `gfx_select!`. This +// means we can't use `#[cfg]` attributes in `gfx_select!`s definition itself. +// Instead, for each backend, `gfx_select!` must use a macro whose definition is +// selected by `#[cfg]` in `wgpu-core`. The configuration predicate is still +// evaluated when the macro is used; we've just moved the `#[cfg]` into a macro +// used by `wgpu-core` itself. + +/// Define an exported macro named `$public` that expands to an expression if +/// the feature `$feature` is enabled, or to a panic otherwise. +/// +/// This is used in the definition of `gfx_select!`, to dispatch the +/// call to the appropriate backend, but panic if that backend was not +/// compiled in. +/// +/// For a call like this: +/// +/// ```ignore +/// define_backend_caller! { name, private, "feature" if cfg_condition } +/// ``` +/// +/// define a macro `name`, used like this: +/// +/// ```ignore +/// name!(expr) +/// ``` +/// +/// that expands to `expr` if `#[cfg(cfg_condition)]` is enabled, or a +/// panic otherwise. The panic message complains that `"feature"` is +/// not enabled. +/// +/// Because of odd technical limitations on exporting macros expanded +/// by other macros, you must supply both a public-facing name for the +/// macro and a private name, `$private`, which is never used +/// outside this macro. For details: +/// <https://github.com/rust-lang/rust/pull/52234#issuecomment-976702997> +macro_rules! define_backend_caller { + { $public:ident, $private:ident, $feature:literal if $cfg:meta } => { + #[cfg($cfg)] + #[macro_export] + macro_rules! $private { + ( $call:expr ) => ( $call ) + } + + #[cfg(not($cfg))] + #[macro_export] + macro_rules! $private { + ( $call:expr ) => ( + panic!("Identifier refers to disabled backend feature {:?}", $feature) + ) + } + + // See note about rust-lang#52234 above. + #[doc(hidden)] pub use $private as $public; + } +} + +// Define a macro for each `gfx_select!` match arm. For example, +// +// gfx_if_vulkan!(expr) +// +// expands to `expr` if the `"vulkan"` feature is enabled, or to a panic +// otherwise. +define_backend_caller! { gfx_if_vulkan, gfx_if_vulkan_hidden, "vulkan" if all(feature = "vulkan", not(target_arch = "wasm32")) } +define_backend_caller! { gfx_if_metal, gfx_if_metal_hidden, "metal" if all(feature = "metal", any(target_os = "macos", target_os = "ios")) } +define_backend_caller! { gfx_if_dx12, gfx_if_dx12_hidden, "dx12" if all(feature = "dx12", windows) } +define_backend_caller! { gfx_if_gles, gfx_if_gles_hidden, "gles" if feature = "gles" } +define_backend_caller! { gfx_if_empty, gfx_if_empty_hidden, "empty" if all( + not(any(feature = "metal", feature = "vulkan", feature = "gles")), + any(target_os = "macos", target_os = "ios"), +) } + +/// Dispatch on an [`Id`]'s backend to a backend-generic method. +/// +/// Uses of this macro have the form: +/// +/// ```ignore +/// +/// gfx_select!(id => value.method(args...)) +/// +/// ``` +/// +/// This expands to an expression that calls `value.method::<A>(args...)` for +/// the backend `A` selected by `id`. The expansion matches on `id.backend()`, +/// with an arm for each backend type in [`wgpu_types::Backend`] which calls the +/// specialization of `method` for the given backend. This allows resource +/// identifiers to select backends dynamically, even though many `wgpu_core` +/// methods are compiled and optimized for a specific back end. +/// +/// This macro is typically used to call methods on [`wgpu_core::global::Global`], +/// many of which take a single `hal::Api` type parameter. For example, to +/// create a new buffer on the device indicated by `device_id`, one would say: +/// +/// ```ignore +/// gfx_select!(device_id => global.device_create_buffer(device_id, ...)) +/// ``` +/// +/// where the `device_create_buffer` method is defined like this: +/// +/// ```ignore +/// impl Global { +/// pub fn device_create_buffer<A: HalApi>(&self, ...) -> ... +/// { ... } +/// } +/// ``` +/// +/// That `gfx_select!` call uses `device_id`'s backend to select the right +/// backend type `A` for a call to `Global::device_create_buffer<A>`. +/// +/// However, there's nothing about this macro that is specific to `hub::Global`. +/// For example, Firefox's embedding of `wgpu_core` defines its own types with +/// methods that take `hal::Api` type parameters. Firefox uses `gfx_select!` to +/// dynamically dispatch to the right specialization based on the resource's id. +/// +/// [`wgpu_types::Backend`]: wgt::Backend +/// [`wgpu_core::global::Global`]: crate::global::Global +/// [`Id`]: id::Id +#[macro_export] +macro_rules! gfx_select { + // Simple two-component expression, like `self.0.method(..)`. + ($id:expr => $c0:ident.$c1:tt.$method:ident $params:tt) => { + $crate::gfx_select!($id => {$c0.$c1}, $method $params) + }; + + // Simple identifier-only expression, like `global.method(..)`. + ($id:expr => $c0:ident.$method:ident $params:tt) => { + $crate::gfx_select!($id => {$c0}, $method $params) + }; + + ($id:expr => {$($c:tt)*}, $method:ident $params:tt) => { + match $id.backend() { + wgt::Backend::Vulkan => $crate::gfx_if_vulkan!($($c)*.$method::<$crate::api::Vulkan> $params), + wgt::Backend::Metal => $crate::gfx_if_metal!($($c)*.$method::<$crate::api::Metal> $params), + wgt::Backend::Dx12 => $crate::gfx_if_dx12!($($c)*.$method::<$crate::api::Dx12> $params), + wgt::Backend::Gl => $crate::gfx_if_gles!($($c)*.$method::<$crate::api::Gles> $params), + wgt::Backend::Empty => $crate::gfx_if_empty!($($c)*.$method::<$crate::api::Empty> $params), + other => panic!("Unexpected backend {:?}", other), + } + }; +} + +#[cfg(feature = "api_log_info")] +macro_rules! api_log { + ($($arg:tt)+) => (log::info!($($arg)+)) +} +#[cfg(not(feature = "api_log_info"))] +macro_rules! api_log { + ($($arg:tt)+) => (log::trace!($($arg)+)) +} +pub(crate) use api_log; + +#[cfg(feature = "resource_log_info")] +macro_rules! resource_log { + ($($arg:tt)+) => (log::info!($($arg)+)) +} +#[cfg(not(feature = "resource_log_info"))] +macro_rules! resource_log { + ($($arg:tt)+) => (log::trace!($($arg)+)) +} +pub(crate) use resource_log; + +#[inline] +pub(crate) fn get_lowest_common_denom(a: u32, b: u32) -> u32 { + let gcd = if a >= b { + get_greatest_common_divisor(a, b) + } else { + get_greatest_common_divisor(b, a) + }; + a * b / gcd +} + +#[inline] +pub(crate) fn get_greatest_common_divisor(mut a: u32, mut b: u32) -> u32 { + assert!(a >= b); + loop { + let c = a % b; + if c == 0 { + return b; + } else { + a = b; + b = c; + } + } +} + +#[test] +fn test_lcd() { + assert_eq!(get_lowest_common_denom(2, 2), 2); + assert_eq!(get_lowest_common_denom(2, 3), 6); + assert_eq!(get_lowest_common_denom(6, 4), 12); +} + +#[test] +fn test_gcd() { + assert_eq!(get_greatest_common_divisor(5, 1), 1); + assert_eq!(get_greatest_common_divisor(4, 2), 2); + assert_eq!(get_greatest_common_divisor(6, 4), 2); + assert_eq!(get_greatest_common_divisor(7, 7), 7); +} diff --git a/third_party/rust/wgpu-core/src/pipeline.rs b/third_party/rust/wgpu-core/src/pipeline.rs new file mode 100644 index 0000000000..acc1b24b0c --- /dev/null +++ b/third_party/rust/wgpu-core/src/pipeline.rs @@ -0,0 +1,561 @@ +#[cfg(feature = "trace")] +use crate::device::trace; +use crate::{ + binding_model::{CreateBindGroupLayoutError, CreatePipelineLayoutError, PipelineLayout}, + command::ColorAttachmentError, + device::{Device, DeviceError, MissingDownlevelFlags, MissingFeatures, RenderPassContext}, + hal_api::HalApi, + id::{PipelineLayoutId, ShaderModuleId}, + resource::{Resource, ResourceInfo, ResourceType}, + resource_log, validation, Label, +}; +use arrayvec::ArrayVec; +use std::{borrow::Cow, error::Error, fmt, marker::PhantomData, num::NonZeroU32, sync::Arc}; +use thiserror::Error; + +/// Information about buffer bindings, which +/// is validated against the shader (and pipeline) +/// at draw time as opposed to initialization time. +#[derive(Debug)] +pub(crate) struct LateSizedBufferGroup { + // The order has to match `BindGroup::late_buffer_binding_sizes`. + pub(crate) shader_sizes: Vec<wgt::BufferAddress>, +} + +#[allow(clippy::large_enum_variant)] +pub enum ShaderModuleSource<'a> { + #[cfg(feature = "wgsl")] + Wgsl(Cow<'a, str>), + #[cfg(feature = "glsl")] + Glsl(Cow<'a, str>, naga::front::glsl::Options), + #[cfg(feature = "spirv")] + SpirV(Cow<'a, [u32]>, naga::front::spv::Options), + Naga(Cow<'static, naga::Module>), + /// Dummy variant because `Naga` doesn't have a lifetime and without enough active features it + /// could be the last one active. + #[doc(hidden)] + Dummy(PhantomData<&'a ()>), +} + +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ShaderModuleDescriptor<'a> { + pub label: Label<'a>, + #[cfg_attr(feature = "serde", serde(default))] + pub shader_bound_checks: wgt::ShaderBoundChecks, +} + +#[derive(Debug)] +pub struct ShaderModule<A: HalApi> { + pub(crate) raw: Option<A::ShaderModule>, + pub(crate) device: Arc<Device<A>>, + pub(crate) interface: Option<validation::Interface>, + pub(crate) info: ResourceInfo<ShaderModule<A>>, + pub(crate) label: String, +} + +impl<A: HalApi> Drop for ShaderModule<A> { + fn drop(&mut self) { + if let Some(raw) = self.raw.take() { + resource_log!("Destroy raw ShaderModule {:?}", self.info.label()); + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyShaderModule(self.info.id())); + } + unsafe { + use hal::Device; + self.device.raw().destroy_shader_module(raw); + } + } + } +} + +impl<A: HalApi> Resource for ShaderModule<A> { + const TYPE: ResourceType = "ShaderModule"; + + type Marker = crate::id::markers::ShaderModule; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } + + fn label(&self) -> String { + self.label.clone() + } +} + +impl<A: HalApi> ShaderModule<A> { + pub(crate) fn raw(&self) -> &A::ShaderModule { + self.raw.as_ref().unwrap() + } +} + +#[derive(Clone, Debug)] +pub struct ShaderError<E> { + pub source: String, + pub label: Option<String>, + pub inner: Box<E>, +} +#[cfg(feature = "wgsl")] +impl fmt::Display for ShaderError<naga::front::wgsl::ParseError> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let label = self.label.as_deref().unwrap_or_default(); + let string = self.inner.emit_to_string(&self.source); + write!(f, "\nShader '{label}' parsing {string}") + } +} +#[cfg(feature = "glsl")] +impl fmt::Display for ShaderError<naga::front::glsl::ParseError> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let label = self.label.as_deref().unwrap_or_default(); + let string = self.inner.emit_to_string(&self.source); + write!(f, "\nShader '{label}' parsing {string}") + } +} +#[cfg(feature = "spirv")] +impl fmt::Display for ShaderError<naga::front::spv::Error> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let label = self.label.as_deref().unwrap_or_default(); + let string = self.inner.emit_to_string(&self.source); + write!(f, "\nShader '{label}' parsing {string}") + } +} +impl fmt::Display for ShaderError<naga::WithSpan<naga::valid::ValidationError>> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use codespan_reporting::{ + diagnostic::{Diagnostic, Label}, + files::SimpleFile, + term, + }; + + let label = self.label.as_deref().unwrap_or_default(); + let files = SimpleFile::new(label, &self.source); + let config = term::Config::default(); + let mut writer = term::termcolor::NoColor::new(Vec::new()); + + let diagnostic = Diagnostic::error().with_labels( + self.inner + .spans() + .map(|&(span, ref desc)| { + Label::primary((), span.to_range().unwrap()).with_message(desc.to_owned()) + }) + .collect(), + ); + + term::emit(&mut writer, &config, &files, &diagnostic).expect("cannot write error"); + + write!( + f, + "\nShader validation {}", + String::from_utf8_lossy(&writer.into_inner()) + ) + } +} +impl<E> Error for ShaderError<E> +where + ShaderError<E>: fmt::Display, + E: Error + 'static, +{ + fn source(&self) -> Option<&(dyn Error + 'static)> { + Some(&self.inner) + } +} + +//Note: `Clone` would require `WithSpan: Clone`. +#[derive(Debug, Error)] +#[non_exhaustive] +pub enum CreateShaderModuleError { + #[cfg(feature = "wgsl")] + #[error(transparent)] + Parsing(#[from] ShaderError<naga::front::wgsl::ParseError>), + #[cfg(feature = "glsl")] + #[error(transparent)] + ParsingGlsl(#[from] ShaderError<naga::front::glsl::ParseError>), + #[cfg(feature = "spirv")] + #[error(transparent)] + ParsingSpirV(#[from] ShaderError<naga::front::spv::Error>), + #[error("Failed to generate the backend-specific code")] + Generation, + #[error(transparent)] + Device(#[from] DeviceError), + #[error(transparent)] + Validation(#[from] ShaderError<naga::WithSpan<naga::valid::ValidationError>>), + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), + #[error( + "Shader global {bind:?} uses a group index {group} that exceeds the max_bind_groups limit of {limit}." + )] + InvalidGroupIndex { + bind: naga::ResourceBinding, + group: u32, + limit: u32, + }, +} + +impl CreateShaderModuleError { + pub fn location(&self, source: &str) -> Option<naga::SourceLocation> { + match *self { + #[cfg(feature = "wgsl")] + CreateShaderModuleError::Parsing(ref err) => err.inner.location(source), + CreateShaderModuleError::Validation(ref err) => err.inner.location(source), + _ => None, + } + } +} + +/// Describes a programmable pipeline stage. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ProgrammableStageDescriptor<'a> { + /// The compiled shader module for this stage. + pub module: ShaderModuleId, + /// The name of the entry point in the compiled shader. There must be a function with this name + /// in the shader. + pub entry_point: Cow<'a, str>, +} + +/// Number of implicit bind groups derived at pipeline creation. +pub type ImplicitBindGroupCount = u8; + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum ImplicitLayoutError { + #[error("Missing IDs for deriving {0} bind groups")] + MissingIds(ImplicitBindGroupCount), + #[error("Unable to reflect the shader {0:?} interface")] + ReflectionError(wgt::ShaderStages), + #[error(transparent)] + BindGroup(#[from] CreateBindGroupLayoutError), + #[error(transparent)] + Pipeline(#[from] CreatePipelineLayoutError), +} + +/// Describes a compute pipeline. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ComputePipelineDescriptor<'a> { + pub label: Label<'a>, + /// The layout of bind groups for this pipeline. + pub layout: Option<PipelineLayoutId>, + /// The compiled compute stage and its entry point. + pub stage: ProgrammableStageDescriptor<'a>, +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CreateComputePipelineError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Pipeline layout is invalid")] + InvalidLayout, + #[error("Unable to derive an implicit layout")] + Implicit(#[from] ImplicitLayoutError), + #[error("Error matching shader requirements against the pipeline")] + Stage(#[from] validation::StageError), + #[error("Internal error: {0}")] + Internal(String), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), +} + +#[derive(Debug)] +pub struct ComputePipeline<A: HalApi> { + pub(crate) raw: Option<A::ComputePipeline>, + pub(crate) layout: Arc<PipelineLayout<A>>, + pub(crate) device: Arc<Device<A>>, + pub(crate) _shader_module: Arc<ShaderModule<A>>, + pub(crate) late_sized_buffer_groups: ArrayVec<LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }>, + pub(crate) info: ResourceInfo<ComputePipeline<A>>, +} + +impl<A: HalApi> Drop for ComputePipeline<A> { + fn drop(&mut self) { + if let Some(raw) = self.raw.take() { + resource_log!("Destroy raw ComputePipeline {:?}", self.info.label()); + + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyComputePipeline(self.info.id())); + } + + unsafe { + use hal::Device; + self.device.raw().destroy_compute_pipeline(raw); + } + } + } +} + +impl<A: HalApi> Resource for ComputePipeline<A> { + const TYPE: ResourceType = "ComputePipeline"; + + type Marker = crate::id::markers::ComputePipeline; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +impl<A: HalApi> ComputePipeline<A> { + pub(crate) fn raw(&self) -> &A::ComputePipeline { + self.raw.as_ref().unwrap() + } +} + +/// Describes how the vertex buffer is interpreted. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", serde(rename_all = "camelCase"))] +pub struct VertexBufferLayout<'a> { + /// The stride, in bytes, between elements of this buffer. + pub array_stride: wgt::BufferAddress, + /// How often this vertex buffer is "stepped" forward. + pub step_mode: wgt::VertexStepMode, + /// The list of attributes which comprise a single vertex. + pub attributes: Cow<'a, [wgt::VertexAttribute]>, +} + +/// Describes the vertex process in a render pipeline. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct VertexState<'a> { + /// The compiled vertex stage and its entry point. + pub stage: ProgrammableStageDescriptor<'a>, + /// The format of any vertex buffers used with this pipeline. + pub buffers: Cow<'a, [VertexBufferLayout<'a>]>, +} + +/// Describes fragment processing in a render pipeline. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct FragmentState<'a> { + /// The compiled fragment stage and its entry point. + pub stage: ProgrammableStageDescriptor<'a>, + /// The effect of draw calls on the color aspect of the output target. + pub targets: Cow<'a, [Option<wgt::ColorTargetState>]>, +} + +/// Describes a render (graphics) pipeline. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct RenderPipelineDescriptor<'a> { + pub label: Label<'a>, + /// The layout of bind groups for this pipeline. + pub layout: Option<PipelineLayoutId>, + /// The vertex processing state for this pipeline. + pub vertex: VertexState<'a>, + /// The properties of the pipeline at the primitive assembly and rasterization level. + #[cfg_attr(feature = "serde", serde(default))] + pub primitive: wgt::PrimitiveState, + /// The effect of draw calls on the depth and stencil aspects of the output target, if any. + #[cfg_attr(feature = "serde", serde(default))] + pub depth_stencil: Option<wgt::DepthStencilState>, + /// The multi-sampling properties of the pipeline. + #[cfg_attr(feature = "serde", serde(default))] + pub multisample: wgt::MultisampleState, + /// The fragment processing state for this pipeline. + pub fragment: Option<FragmentState<'a>>, + /// If the pipeline will be used with a multiview render pass, this indicates how many array + /// layers the attachments will have. + pub multiview: Option<NonZeroU32>, +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum ColorStateError { + #[error("Format {0:?} is not renderable")] + FormatNotRenderable(wgt::TextureFormat), + #[error("Format {0:?} is not blendable")] + FormatNotBlendable(wgt::TextureFormat), + #[error("Format {0:?} does not have a color aspect")] + FormatNotColor(wgt::TextureFormat), + #[error("Sample count {0} is not supported by format {1:?} on this device. The WebGPU spec guarantees {2:?} samples are supported by this format. With the TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES feature your device supports {3:?}.")] + InvalidSampleCount(u32, wgt::TextureFormat, Vec<u32>, Vec<u32>), + #[error("Output format {pipeline} is incompatible with the shader {shader}")] + IncompatibleFormat { + pipeline: validation::NumericType, + shader: validation::NumericType, + }, + #[error("Blend factors for {0:?} must be `One`")] + InvalidMinMaxBlendFactors(wgt::BlendComponent), + #[error("Invalid write mask {0:?}")] + InvalidWriteMask(wgt::ColorWrites), +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum DepthStencilStateError { + #[error("Format {0:?} is not renderable")] + FormatNotRenderable(wgt::TextureFormat), + #[error("Format {0:?} does not have a depth aspect, but depth test/write is enabled")] + FormatNotDepth(wgt::TextureFormat), + #[error("Format {0:?} does not have a stencil aspect, but stencil test/write is enabled")] + FormatNotStencil(wgt::TextureFormat), + #[error("Sample count {0} is not supported by format {1:?} on this device. The WebGPU spec guarantees {2:?} samples are supported by this format. With the TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES feature your device supports {3:?}.")] + InvalidSampleCount(u32, wgt::TextureFormat, Vec<u32>, Vec<u32>), +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CreateRenderPipelineError { + #[error(transparent)] + ColorAttachment(#[from] ColorAttachmentError), + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Pipeline layout is invalid")] + InvalidLayout, + #[error("Unable to derive an implicit layout")] + Implicit(#[from] ImplicitLayoutError), + #[error("Color state [{0}] is invalid")] + ColorState(u8, #[source] ColorStateError), + #[error("Depth/stencil state is invalid")] + DepthStencilState(#[from] DepthStencilStateError), + #[error("Invalid sample count {0}")] + InvalidSampleCount(u32), + #[error("The number of vertex buffers {given} exceeds the limit {limit}")] + TooManyVertexBuffers { given: u32, limit: u32 }, + #[error("The total number of vertex attributes {given} exceeds the limit {limit}")] + TooManyVertexAttributes { given: u32, limit: u32 }, + #[error("Vertex buffer {index} stride {given} exceeds the limit {limit}")] + VertexStrideTooLarge { index: u32, given: u32, limit: u32 }, + #[error("Vertex buffer {index} stride {stride} does not respect `VERTEX_STRIDE_ALIGNMENT`")] + UnalignedVertexStride { + index: u32, + stride: wgt::BufferAddress, + }, + #[error("Vertex attribute at location {location} has invalid offset {offset}")] + InvalidVertexAttributeOffset { + location: wgt::ShaderLocation, + offset: wgt::BufferAddress, + }, + #[error("Two or more vertex attributes were assigned to the same location in the shader: {0}")] + ShaderLocationClash(u32), + #[error("Strip index format was not set to None but to {strip_index_format:?} while using the non-strip topology {topology:?}")] + StripIndexFormatForNonStripTopology { + strip_index_format: Option<wgt::IndexFormat>, + topology: wgt::PrimitiveTopology, + }, + #[error("Conservative Rasterization is only supported for wgt::PolygonMode::Fill")] + ConservativeRasterizationNonFillPolygonMode, + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), + #[error("Error matching {stage:?} shader requirements against the pipeline")] + Stage { + stage: wgt::ShaderStages, + #[source] + error: validation::StageError, + }, + #[error("Internal error in {stage:?} shader: {error}")] + Internal { + stage: wgt::ShaderStages, + error: String, + }, + #[error("In the provided shader, the type given for group {group} binding {binding} has a size of {size}. As the device does not support `DownlevelFlags::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED`, the type must have a size that is a multiple of 16 bytes.")] + UnalignedShader { group: u32, binding: u32, size: u64 }, + #[error("Using the blend factor {factor:?} for render target {target} is not possible. Only the first render target may be used when dual-source blending.")] + BlendFactorOnUnsupportedTarget { + factor: wgt::BlendFactor, + target: u32, + }, + #[error("Pipeline expects the shader entry point to make use of dual-source blending.")] + PipelineExpectsShaderToUseDualSourceBlending, + #[error("Shader entry point expects the pipeline to make use of dual-source blending.")] + ShaderExpectsPipelineToUseDualSourceBlending, +} + +bitflags::bitflags! { + #[repr(transparent)] + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct PipelineFlags: u32 { + const BLEND_CONSTANT = 1 << 0; + const STENCIL_REFERENCE = 1 << 1; + const WRITES_DEPTH = 1 << 2; + const WRITES_STENCIL = 1 << 3; + } +} + +/// How a render pipeline will retrieve attributes from a particular vertex buffer. +#[derive(Clone, Copy, Debug)] +pub struct VertexStep { + /// The byte stride in the buffer between one attribute value and the next. + pub stride: wgt::BufferAddress, + + /// The byte size required to fit the last vertex in the stream. + pub last_stride: wgt::BufferAddress, + + /// Whether the buffer is indexed by vertex number or instance number. + pub mode: wgt::VertexStepMode, +} + +impl Default for VertexStep { + fn default() -> Self { + Self { + stride: 0, + last_stride: 0, + mode: wgt::VertexStepMode::Vertex, + } + } +} + +#[derive(Debug)] +pub struct RenderPipeline<A: HalApi> { + pub(crate) raw: Option<A::RenderPipeline>, + pub(crate) device: Arc<Device<A>>, + pub(crate) layout: Arc<PipelineLayout<A>>, + pub(crate) _shader_modules: + ArrayVec<Arc<ShaderModule<A>>, { hal::MAX_CONCURRENT_SHADER_STAGES }>, + pub(crate) pass_context: RenderPassContext, + pub(crate) flags: PipelineFlags, + pub(crate) strip_index_format: Option<wgt::IndexFormat>, + pub(crate) vertex_steps: Vec<VertexStep>, + pub(crate) late_sized_buffer_groups: ArrayVec<LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }>, + pub(crate) info: ResourceInfo<RenderPipeline<A>>, +} + +impl<A: HalApi> Drop for RenderPipeline<A> { + fn drop(&mut self) { + if let Some(raw) = self.raw.take() { + resource_log!("Destroy raw RenderPipeline {:?}", self.info.label()); + + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyRenderPipeline(self.info.id())); + } + + unsafe { + use hal::Device; + self.device.raw().destroy_render_pipeline(raw); + } + } + } +} + +impl<A: HalApi> Resource for RenderPipeline<A> { + const TYPE: ResourceType = "RenderPipeline"; + + type Marker = crate::id::markers::RenderPipeline; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +impl<A: HalApi> RenderPipeline<A> { + pub(crate) fn raw(&self) -> &A::RenderPipeline { + self.raw.as_ref().unwrap() + } +} diff --git a/third_party/rust/wgpu-core/src/pool.rs b/third_party/rust/wgpu-core/src/pool.rs new file mode 100644 index 0000000000..47de6d5feb --- /dev/null +++ b/third_party/rust/wgpu-core/src/pool.rs @@ -0,0 +1,312 @@ +use std::{ + collections::{hash_map::Entry, HashMap}, + hash::Hash, + sync::{Arc, Weak}, +}; + +use once_cell::sync::OnceCell; +use parking_lot::Mutex; + +use crate::{PreHashedKey, PreHashedMap}; + +type SlotInner<V> = Weak<V>; +type ResourcePoolSlot<V> = Arc<OnceCell<SlotInner<V>>>; + +pub struct ResourcePool<K, V> { + // We use a pre-hashed map as we never actually need to read the keys. + // + // This additionally allows us to not need to hash more than once on get_or_init. + inner: Mutex<PreHashedMap<K, ResourcePoolSlot<V>>>, +} + +impl<K: Clone + Eq + Hash, V> ResourcePool<K, V> { + pub fn new() -> Self { + Self { + inner: Mutex::new(HashMap::default()), + } + } + + /// Get a resource from the pool with the given entry map, or create a new one if it doesn't exist using the given constructor. + /// + /// Behaves such that only one resource will be created for each unique entry map at any one time. + pub fn get_or_init<F, E>(&self, key: K, constructor: F) -> Result<Arc<V>, E> + where + F: FnOnce(K) -> Result<Arc<V>, E>, + { + // Hash the key outside of the lock. + let hashed_key = PreHashedKey::from_key(&key); + + // We can't prove at compile time that these will only ever be consumed once, + // so we need to do the check at runtime. + let mut key = Some(key); + let mut constructor = Some(constructor); + + 'race: loop { + let mut map_guard = self.inner.lock(); + + let entry = match map_guard.entry(hashed_key) { + // An entry exists for this resource. + // + // We know that either: + // - The resource is still alive, and Weak::upgrade will succeed. + // - The resource is in the process of being dropped, and Weak::upgrade will fail. + // + // The entry will never be empty while the BGL is still alive. + Entry::Occupied(entry) => Arc::clone(entry.get()), + // No entry exists for this resource. + // + // We know that the resource is not alive, so we can create a new entry. + Entry::Vacant(entry) => Arc::clone(entry.insert(Arc::new(OnceCell::new()))), + }; + + drop(map_guard); + + // Some other thread may beat us to initializing the entry, but OnceCell guarantees that only one thread + // will actually initialize the entry. + // + // We pass the strong reference outside of the closure to keep it alive while we're the only one keeping a reference to it. + let mut strong = None; + let weak = entry.get_or_try_init(|| { + let strong_inner = constructor.take().unwrap()(key.take().unwrap())?; + let weak = Arc::downgrade(&strong_inner); + strong = Some(strong_inner); + Ok(weak) + })?; + + // If strong is Some, that means we just initialized the entry, so we can just return it. + if let Some(strong) = strong { + return Ok(strong); + } + + // The entry was already initialized by someone else, so we need to try to upgrade it. + if let Some(strong) = weak.upgrade() { + // We succeed, the resource is still alive, just return that. + return Ok(strong); + } + + // The resource is in the process of being dropped, because upgrade failed. The entry still exists in the map, but it points to nothing. + // + // We're in a race with the drop implementation of the resource, so lets just go around again. When we go around again: + // - If the entry exists, we might need to go around a few more times. + // - If the entry doesn't exist, we'll create a new one. + continue 'race; + } + } + + /// Remove the given entry map from the pool. + /// + /// Must *only* be called in the Drop impl of [`BindGroupLayout`]. + pub fn remove(&self, key: &K) { + let hashed_key = PreHashedKey::from_key(key); + + let mut map_guard = self.inner.lock(); + + // Weak::upgrade will be failing long before this code is called. All threads trying to access the resource will be spinning, + // waiting for the entry to be removed. It is safe to remove the entry from the map. + map_guard.remove(&hashed_key); + } +} + +#[cfg(test)] +mod tests { + use std::sync::{ + atomic::{AtomicU32, Ordering}, + Barrier, + }; + + use super::*; + + #[test] + fn deduplication() { + let pool = ResourcePool::<u32, u32>::new(); + + let mut counter = 0_u32; + + let arc1 = pool + .get_or_init::<_, ()>(0, |key| { + counter += 1; + Ok(Arc::new(key)) + }) + .unwrap(); + + assert_eq!(*arc1, 0); + assert_eq!(counter, 1); + + let arc2 = pool + .get_or_init::<_, ()>(0, |key| { + counter += 1; + Ok(Arc::new(key)) + }) + .unwrap(); + + assert!(Arc::ptr_eq(&arc1, &arc2)); + assert_eq!(*arc2, 0); + assert_eq!(counter, 1); + + drop(arc1); + drop(arc2); + pool.remove(&0); + + let arc3 = pool + .get_or_init::<_, ()>(0, |key| { + counter += 1; + Ok(Arc::new(key)) + }) + .unwrap(); + + assert_eq!(*arc3, 0); + assert_eq!(counter, 2); + } + + // Test name has "2_threads" in the name so nextest reserves two threads for it. + #[test] + fn concurrent_creation_2_threads() { + struct Resources { + pool: ResourcePool<u32, u32>, + counter: AtomicU32, + barrier: Barrier, + } + + let resources = Arc::new(Resources { + pool: ResourcePool::<u32, u32>::new(), + counter: AtomicU32::new(0), + barrier: Barrier::new(2), + }); + + // Like all races, this is not inherently guaranteed to work, but in practice it should work fine. + // + // To validate the expected order of events, we've put print statements in the code, indicating when each thread is at a certain point. + // The output will look something like this if the test is working as expected: + // + // ``` + // 0: prewait + // 1: prewait + // 1: postwait + // 0: postwait + // 1: init + // 1: postget + // 0: postget + // ``` + fn thread_inner(idx: u8, resources: &Resources) -> Arc<u32> { + eprintln!("{idx}: prewait"); + + // Once this returns, both threads should hit get_or_init at about the same time, + // allowing us to actually test concurrent creation. + // + // Like all races, this is not inherently guaranteed to work, but in practice it should work fine. + resources.barrier.wait(); + + eprintln!("{idx}: postwait"); + + let ret = resources + .pool + .get_or_init::<_, ()>(0, |key| { + eprintln!("{idx}: init"); + + // Simulate long running constructor, ensuring that both threads will be in get_or_init. + std::thread::sleep(std::time::Duration::from_millis(250)); + + resources.counter.fetch_add(1, Ordering::SeqCst); + + Ok(Arc::new(key)) + }) + .unwrap(); + + eprintln!("{idx}: postget"); + + ret + } + + let thread1 = std::thread::spawn({ + let resource_clone = Arc::clone(&resources); + move || thread_inner(1, &resource_clone) + }); + + let arc0 = thread_inner(0, &resources); + + assert_eq!(resources.counter.load(Ordering::Acquire), 1); + + let arc1 = thread1.join().unwrap(); + + assert!(Arc::ptr_eq(&arc0, &arc1)); + } + + // Test name has "2_threads" in the name so nextest reserves two threads for it. + #[test] + fn create_while_drop_2_threads() { + struct Resources { + pool: ResourcePool<u32, u32>, + barrier: Barrier, + } + + let resources = Arc::new(Resources { + pool: ResourcePool::<u32, u32>::new(), + barrier: Barrier::new(2), + }); + + // Like all races, this is not inherently guaranteed to work, but in practice it should work fine. + // + // To validate the expected order of events, we've put print statements in the code, indicating when each thread is at a certain point. + // The output will look something like this if the test is working as expected: + // + // ``` + // 0: prewait + // 1: prewait + // 1: postwait + // 0: postwait + // 1: postsleep + // 1: removal + // 0: postget + // ``` + // + // The last two _may_ be flipped. + + let existing_entry = resources + .pool + .get_or_init::<_, ()>(0, |key| Ok(Arc::new(key))) + .unwrap(); + + // Drop the entry, but do _not_ remove it from the pool. + // This simulates the situation where the resource arc has been dropped, but the Drop implementation + // has not yet run, which calls remove. + drop(existing_entry); + + fn thread0_inner(resources: &Resources) { + eprintln!("0: prewait"); + resources.barrier.wait(); + + eprintln!("0: postwait"); + // We try to create a new entry, but the entry already exists. + // + // As Arc::upgrade is failing, we will just keep spinning until remove is called. + resources + .pool + .get_or_init::<_, ()>(0, |key| Ok(Arc::new(key))) + .unwrap(); + eprintln!("0: postget"); + } + + fn thread1_inner(resources: &Resources) { + eprintln!("1: prewait"); + resources.barrier.wait(); + + eprintln!("1: postwait"); + // We wait a little bit, making sure that thread0_inner has started spinning. + std::thread::sleep(std::time::Duration::from_millis(250)); + eprintln!("1: postsleep"); + + // We remove the entry from the pool, allowing thread0_inner to re-create. + resources.pool.remove(&0); + eprintln!("1: removal"); + } + + let thread1 = std::thread::spawn({ + let resource_clone = Arc::clone(&resources); + move || thread1_inner(&resource_clone) + }); + + thread0_inner(&resources); + + thread1.join().unwrap(); + } +} diff --git a/third_party/rust/wgpu-core/src/present.rs b/third_party/rust/wgpu-core/src/present.rs new file mode 100644 index 0000000000..4d8e1df73e --- /dev/null +++ b/third_party/rust/wgpu-core/src/present.rs @@ -0,0 +1,441 @@ +/*! Presentation. + +## Lifecycle + +Whenever a submission detects the use of any surface texture, it adds it to the device +tracker for the duration of the submission (temporarily, while recording). +It's added with `UNINITIALIZED` state and transitioned into `empty()` state. +When this texture is presented, we remove it from the device tracker as well as +extract it from the hub. +!*/ + +use std::{ + borrow::Borrow, + sync::atomic::{AtomicBool, Ordering}, +}; + +#[cfg(feature = "trace")] +use crate::device::trace::Action; +use crate::{ + conv, + device::any_device::AnyDevice, + device::{DeviceError, MissingDownlevelFlags, WaitIdleError}, + global::Global, + hal_api::HalApi, + hal_label, id, + init_tracker::TextureInitTracker, + resource::{self, ResourceInfo}, + snatch::Snatchable, + track, +}; + +use hal::{Queue as _, Surface as _}; +use parking_lot::{Mutex, RwLock}; +use thiserror::Error; +use wgt::SurfaceStatus as Status; + +const FRAME_TIMEOUT_MS: u32 = 1000; + +#[derive(Debug)] +pub(crate) struct Presentation { + pub(crate) device: AnyDevice, + pub(crate) config: wgt::SurfaceConfiguration<Vec<wgt::TextureFormat>>, + pub(crate) acquired_texture: Option<id::TextureId>, +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum SurfaceError { + #[error("Surface is invalid")] + Invalid, + #[error("Surface is not configured for presentation")] + NotConfigured, + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Surface image is already acquired")] + AlreadyAcquired, + #[error("Acquired frame is still referenced")] + StillReferenced, +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum ConfigureSurfaceError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Invalid surface")] + InvalidSurface, + #[error("The view format {0:?} is not compatible with texture format {1:?}, only changing srgb-ness is allowed.")] + InvalidViewFormat(wgt::TextureFormat, wgt::TextureFormat), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), + #[error("`SurfaceOutput` must be dropped before a new `Surface` is made")] + PreviousOutputExists, + #[error("Both `Surface` width and height must be non-zero. Wait to recreate the `Surface` until the window has non-zero area.")] + ZeroArea, + #[error("`Surface` width and height must be within the maximum supported texture size. Requested was ({width}, {height}), maximum extent is {max_texture_dimension_2d}.")] + TooLarge { + width: u32, + height: u32, + max_texture_dimension_2d: u32, + }, + #[error("Surface does not support the adapter's queue family")] + UnsupportedQueueFamily, + #[error("Requested format {requested:?} is not in list of supported formats: {available:?}")] + UnsupportedFormat { + requested: wgt::TextureFormat, + available: Vec<wgt::TextureFormat>, + }, + #[error("Requested present mode {requested:?} is not in the list of supported present modes: {available:?}")] + UnsupportedPresentMode { + requested: wgt::PresentMode, + available: Vec<wgt::PresentMode>, + }, + #[error("Requested alpha mode {requested:?} is not in the list of supported alpha modes: {available:?}")] + UnsupportedAlphaMode { + requested: wgt::CompositeAlphaMode, + available: Vec<wgt::CompositeAlphaMode>, + }, + #[error("Requested usage is not supported")] + UnsupportedUsage, + #[error("Gpu got stuck :(")] + StuckGpu, +} + +impl From<WaitIdleError> for ConfigureSurfaceError { + fn from(e: WaitIdleError) -> Self { + match e { + WaitIdleError::Device(d) => ConfigureSurfaceError::Device(d), + WaitIdleError::WrongSubmissionIndex(..) => unreachable!(), + WaitIdleError::StuckGpu => ConfigureSurfaceError::StuckGpu, + } + } +} + +#[repr(C)] +#[derive(Debug)] +pub struct SurfaceOutput { + pub status: Status, + pub texture_id: Option<id::TextureId>, +} + +impl Global { + pub fn surface_get_current_texture<A: HalApi>( + &self, + surface_id: id::SurfaceId, + texture_id_in: Option<id::TextureId>, + ) -> Result<SurfaceOutput, SurfaceError> { + profiling::scope!("SwapChain::get_next_texture"); + + let hub = A::hub(self); + + let fid = hub.textures.prepare(texture_id_in); + + let surface = self + .surfaces + .get(surface_id) + .map_err(|_| SurfaceError::Invalid)?; + + let (device, config) = if let Some(ref present) = *surface.presentation.lock() { + match present.device.downcast_clone::<A>() { + Some(device) => { + if !device.is_valid() { + return Err(DeviceError::Lost.into()); + } + (device, present.config.clone()) + } + None => return Err(SurfaceError::NotConfigured), + } + } else { + return Err(SurfaceError::NotConfigured); + }; + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(Action::GetSurfaceTexture { + id: fid.id(), + parent_id: surface_id, + }); + } + #[cfg(not(feature = "trace"))] + let _ = device; + + let suf = A::get_surface(surface.as_ref()); + let (texture_id, status) = match unsafe { + suf.unwrap() + .acquire_texture(Some(std::time::Duration::from_millis( + FRAME_TIMEOUT_MS as u64, + ))) + } { + Ok(Some(ast)) => { + let texture_desc = wgt::TextureDescriptor { + label: (), + size: wgt::Extent3d { + width: config.width, + height: config.height, + depth_or_array_layers: 1, + }, + sample_count: 1, + mip_level_count: 1, + format: config.format, + dimension: wgt::TextureDimension::D2, + usage: config.usage, + view_formats: config.view_formats, + }; + let hal_usage = conv::map_texture_usage(config.usage, config.format.into()); + let format_features = wgt::TextureFormatFeatures { + allowed_usages: wgt::TextureUsages::RENDER_ATTACHMENT, + flags: wgt::TextureFormatFeatureFlags::MULTISAMPLE_X4 + | wgt::TextureFormatFeatureFlags::MULTISAMPLE_RESOLVE, + }; + let clear_view_desc = hal::TextureViewDescriptor { + label: hal_label( + Some("(wgpu internal) clear surface texture view"), + self.instance.flags, + ), + format: config.format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::COLOR_TARGET, + range: wgt::ImageSubresourceRange::default(), + }; + let clear_view = unsafe { + hal::Device::create_texture_view( + device.raw(), + ast.texture.borrow(), + &clear_view_desc, + ) + } + .map_err(DeviceError::from)?; + + let mut presentation = surface.presentation.lock(); + let present = presentation.as_mut().unwrap(); + let texture = resource::Texture { + inner: Snatchable::new(resource::TextureInner::Surface { + raw: Some(ast.texture), + parent_id: surface_id, + has_work: AtomicBool::new(false), + }), + device: device.clone(), + desc: texture_desc, + hal_usage, + format_features, + initialization_status: RwLock::new(TextureInitTracker::new(1, 1)), + full_range: track::TextureSelector { + layers: 0..1, + mips: 0..1, + }, + info: ResourceInfo::new("<Surface>"), + clear_mode: RwLock::new(resource::TextureClearMode::Surface { + clear_view: Some(clear_view), + }), + views: Mutex::new(Vec::new()), + bind_groups: Mutex::new(Vec::new()), + }; + + let (id, resource) = fid.assign(texture); + log::debug!("Created CURRENT Surface Texture {:?}", id); + + { + // register it in the device tracker as uninitialized + let mut trackers = device.trackers.lock(); + trackers + .textures + .insert_single(id, resource, hal::TextureUses::UNINITIALIZED); + } + + if present.acquired_texture.is_some() { + return Err(SurfaceError::AlreadyAcquired); + } + present.acquired_texture = Some(id); + + let status = if ast.suboptimal { + Status::Suboptimal + } else { + Status::Good + }; + (Some(id), status) + } + Ok(None) => (None, Status::Timeout), + Err(err) => ( + None, + match err { + hal::SurfaceError::Lost => Status::Lost, + hal::SurfaceError::Device(err) => { + return Err(DeviceError::from(err).into()); + } + hal::SurfaceError::Outdated => Status::Outdated, + hal::SurfaceError::Other(msg) => { + log::error!("acquire error: {}", msg); + Status::Lost + } + }, + ), + }; + + Ok(SurfaceOutput { status, texture_id }) + } + + pub fn surface_present<A: HalApi>( + &self, + surface_id: id::SurfaceId, + ) -> Result<Status, SurfaceError> { + profiling::scope!("SwapChain::present"); + + let hub = A::hub(self); + + let surface = self + .surfaces + .get(surface_id) + .map_err(|_| SurfaceError::Invalid)?; + + let mut presentation = surface.presentation.lock(); + let present = match presentation.as_mut() { + Some(present) => present, + None => return Err(SurfaceError::NotConfigured), + }; + + let device = present.device.downcast_ref::<A>().unwrap(); + if !device.is_valid() { + return Err(DeviceError::Lost.into()); + } + let queue_id = device.queue_id.read().unwrap(); + let queue = hub.queues.get(queue_id).unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(Action::Present(surface_id)); + } + + let result = { + let texture_id = present + .acquired_texture + .take() + .ok_or(SurfaceError::AlreadyAcquired)?; + + // The texture ID got added to the device tracker by `submit()`, + // and now we are moving it away. + log::debug!( + "Removing swapchain texture {:?} from the device tracker", + texture_id + ); + device.trackers.lock().textures.remove(texture_id); + + let texture = hub.textures.unregister(texture_id); + if let Some(texture) = texture { + let mut exclusive_snatch_guard = device.snatchable_lock.write(); + let suf = A::get_surface(&surface); + let mut inner = texture.inner_mut(&mut exclusive_snatch_guard); + let inner = inner.as_mut().unwrap(); + + match *inner { + resource::TextureInner::Surface { + ref mut raw, + ref parent_id, + ref has_work, + } => { + if surface_id != *parent_id { + log::error!("Presented frame is from a different surface"); + Err(hal::SurfaceError::Lost) + } else if !has_work.load(Ordering::Relaxed) { + log::error!("No work has been submitted for this frame"); + unsafe { suf.unwrap().discard_texture(raw.take().unwrap()) }; + Err(hal::SurfaceError::Outdated) + } else { + unsafe { + queue + .raw + .as_ref() + .unwrap() + .present(suf.unwrap(), raw.take().unwrap()) + } + } + } + _ => unreachable!(), + } + } else { + Err(hal::SurfaceError::Outdated) //TODO? + } + }; + + log::debug!("Presented. End of Frame"); + + match result { + Ok(()) => Ok(Status::Good), + Err(err) => match err { + hal::SurfaceError::Lost => Ok(Status::Lost), + hal::SurfaceError::Device(err) => Err(SurfaceError::from(DeviceError::from(err))), + hal::SurfaceError::Outdated => Ok(Status::Outdated), + hal::SurfaceError::Other(msg) => { + log::error!("acquire error: {}", msg); + Err(SurfaceError::Invalid) + } + }, + } + } + + pub fn surface_texture_discard<A: HalApi>( + &self, + surface_id: id::SurfaceId, + ) -> Result<(), SurfaceError> { + profiling::scope!("SwapChain::discard"); + + let hub = A::hub(self); + + let surface = self + .surfaces + .get(surface_id) + .map_err(|_| SurfaceError::Invalid)?; + let mut presentation = surface.presentation.lock(); + let present = match presentation.as_mut() { + Some(present) => present, + None => return Err(SurfaceError::NotConfigured), + }; + + let device = present.device.downcast_ref::<A>().unwrap(); + if !device.is_valid() { + return Err(DeviceError::Lost.into()); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(Action::DiscardSurfaceTexture(surface_id)); + } + + { + let texture_id = present + .acquired_texture + .take() + .ok_or(SurfaceError::AlreadyAcquired)?; + + // The texture ID got added to the device tracker by `submit()`, + // and now we are moving it away. + log::debug!( + "Removing swapchain texture {:?} from the device tracker", + texture_id + ); + device.trackers.lock().textures.remove(texture_id); + + let texture = hub.textures.unregister(texture_id); + if let Some(texture) = texture { + let suf = A::get_surface(&surface); + let exclusive_snatch_guard = device.snatchable_lock.write(); + match texture.inner.snatch(exclusive_snatch_guard).unwrap() { + resource::TextureInner::Surface { + mut raw, + parent_id, + has_work: _, + } => { + if surface_id == parent_id { + unsafe { suf.unwrap().discard_texture(raw.take().unwrap()) }; + } else { + log::warn!("Surface texture is outdated"); + } + } + _ => unreachable!(), + } + } + } + + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/registry.rs b/third_party/rust/wgpu-core/src/registry.rs new file mode 100644 index 0000000000..f55809770b --- /dev/null +++ b/third_party/rust/wgpu-core/src/registry.rs @@ -0,0 +1,200 @@ +use std::sync::Arc; + +use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; +use wgt::Backend; + +use crate::{ + id::Id, + identity::IdentityManager, + resource::Resource, + storage::{Element, InvalidId, Storage}, +}; + +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] +pub struct RegistryReport { + pub num_allocated: usize, + pub num_kept_from_user: usize, + pub num_released_from_user: usize, + pub num_error: usize, + pub element_size: usize, +} + +impl RegistryReport { + pub fn is_empty(&self) -> bool { + self.num_allocated + self.num_kept_from_user == 0 + } +} + +/// Registry is the primary holder of each resource type +/// Every resource is now arcanized so the last arc released +/// will in the end free the memory and release the inner raw resource +/// +/// Registry act as the main entry point to keep resource alive +/// when created and released from user land code +/// +/// A resource may still be alive when released from user land code +/// if it's used in active submission or anyway kept alive from +/// any other dependent resource +/// +#[derive(Debug)] +pub struct Registry<T: Resource> { + identity: Arc<IdentityManager<T::Marker>>, + storage: RwLock<Storage<T>>, + backend: Backend, +} + +impl<T: Resource> Registry<T> { + pub(crate) fn new(backend: Backend) -> Self { + Self { + identity: Arc::new(IdentityManager::new()), + storage: RwLock::new(Storage::new()), + backend, + } + } + + pub(crate) fn without_backend() -> Self { + Self::new(Backend::Empty) + } +} + +#[must_use] +pub(crate) struct FutureId<'a, T: Resource> { + id: Id<T::Marker>, + identity: Arc<IdentityManager<T::Marker>>, + data: &'a RwLock<Storage<T>>, +} + +impl<T: Resource> FutureId<'_, T> { + #[allow(dead_code)] + pub fn id(&self) -> Id<T::Marker> { + self.id + } + + pub fn into_id(self) -> Id<T::Marker> { + self.id + } + + pub fn init(&self, mut value: T) -> Arc<T> { + value.as_info_mut().set_id(self.id, &self.identity); + Arc::new(value) + } + + /// Assign a new resource to this ID. + /// + /// Registers it with the registry, and fills out the resource info. + pub fn assign(self, value: T) -> (Id<T::Marker>, Arc<T>) { + let mut data = self.data.write(); + data.insert(self.id, self.init(value)); + (self.id, data.get(self.id).unwrap().clone()) + } + + /// Assign an existing resource to a new ID. + /// + /// Registers it with the registry. + /// + /// This _will_ leak the ID, and it will not be recycled again. + /// See https://github.com/gfx-rs/wgpu/issues/4912. + pub fn assign_existing(self, value: &Arc<T>) -> Id<T::Marker> { + let mut data = self.data.write(); + debug_assert!(!data.contains(self.id)); + data.insert(self.id, value.clone()); + self.id + } + + pub fn assign_error(self, label: &str) -> Id<T::Marker> { + self.data.write().insert_error(self.id, label); + self.id + } +} + +impl<T: Resource> Registry<T> { + pub(crate) fn prepare(&self, id_in: Option<Id<T::Marker>>) -> FutureId<T> { + FutureId { + id: match id_in { + Some(id_in) => { + self.identity.mark_as_used(id_in); + id_in + } + None => self.identity.process(self.backend), + }, + identity: self.identity.clone(), + data: &self.storage, + } + } + + pub(crate) fn request(&self) -> FutureId<T> { + FutureId { + id: self.identity.process(self.backend), + identity: self.identity.clone(), + data: &self.storage, + } + } + pub(crate) fn try_get(&self, id: Id<T::Marker>) -> Result<Option<Arc<T>>, InvalidId> { + self.read().try_get(id).map(|o| o.cloned()) + } + pub(crate) fn get(&self, id: Id<T::Marker>) -> Result<Arc<T>, InvalidId> { + self.read().get_owned(id) + } + pub(crate) fn read<'a>(&'a self) -> RwLockReadGuard<'a, Storage<T>> { + self.storage.read() + } + pub(crate) fn write<'a>(&'a self) -> RwLockWriteGuard<'a, Storage<T>> { + self.storage.write() + } + pub fn unregister_locked(&self, id: Id<T::Marker>, storage: &mut Storage<T>) -> Option<Arc<T>> { + storage.remove(id) + } + pub fn force_replace(&self, id: Id<T::Marker>, mut value: T) { + let mut storage = self.storage.write(); + value.as_info_mut().set_id(id, &self.identity); + storage.force_replace(id, value) + } + pub fn force_replace_with_error(&self, id: Id<T::Marker>, label: &str) { + let mut storage = self.storage.write(); + storage.remove(id); + storage.insert_error(id, label); + } + pub(crate) fn unregister(&self, id: Id<T::Marker>) -> Option<Arc<T>> { + let value = self.storage.write().remove(id); + //Returning None is legal if it's an error ID + value + } + + pub fn label_for_resource(&self, id: Id<T::Marker>) -> String { + let guard = self.storage.read(); + + let type_name = guard.kind(); + match guard.get(id) { + Ok(res) => { + let label = res.label(); + if label.is_empty() { + format!("<{}-{:?}>", type_name, id.unzip()) + } else { + label + } + } + Err(_) => format!( + "<Invalid-{} label={}>", + type_name, + guard.label_for_invalid_id(id) + ), + } + } + + pub(crate) fn generate_report(&self) -> RegistryReport { + let storage = self.storage.read(); + let mut report = RegistryReport { + element_size: std::mem::size_of::<T>(), + ..Default::default() + }; + report.num_allocated = self.identity.values.lock().count(); + for element in storage.map.iter() { + match *element { + Element::Occupied(..) => report.num_kept_from_user += 1, + Element::Vacant => report.num_released_from_user += 1, + Element::Error(..) => report.num_error += 1, + } + } + report + } +} diff --git a/third_party/rust/wgpu-core/src/resource.rs b/third_party/rust/wgpu-core/src/resource.rs new file mode 100644 index 0000000000..de5d1868a3 --- /dev/null +++ b/third_party/rust/wgpu-core/src/resource.rs @@ -0,0 +1,1507 @@ +#[cfg(feature = "trace")] +use crate::device::trace; +use crate::{ + binding_model::BindGroup, + device::{ + queue, resource::DeferredDestroy, BufferMapPendingClosure, Device, DeviceError, HostMap, + MissingDownlevelFlags, MissingFeatures, + }, + global::Global, + hal_api::HalApi, + id::{AdapterId, BufferId, DeviceId, Id, Marker, SurfaceId, TextureId}, + identity::IdentityManager, + init_tracker::{BufferInitTracker, TextureInitTracker}, + resource, resource_log, + snatch::{ExclusiveSnatchGuard, SnatchGuard, Snatchable}, + track::TextureSelector, + validation::MissingBufferUsageError, + Label, SubmissionIndex, +}; + +use hal::CommandEncoder; +use parking_lot::{Mutex, RwLock}; +use smallvec::SmallVec; +use thiserror::Error; +use wgt::WasmNotSendSync; + +use std::{ + borrow::Borrow, + fmt::Debug, + iter, mem, + ops::Range, + ptr::NonNull, + sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Arc, Weak, + }, +}; + +/// Information about the wgpu-core resource. +/// +/// Each type representing a `wgpu-core` resource, like [`Device`], +/// [`Buffer`], etc., contains a `ResourceInfo` which contains +/// its latest submission index and label. +/// +/// A resource may need to be retained for any of several reasons: +/// and any lifetime logic will be handled by `Arc<Resource>` refcount +/// +/// - The user may hold a reference to it (via a `wgpu::Buffer`, say). +/// +/// - Other resources may depend on it (a texture view's backing +/// texture, for example). +/// +/// - It may be used by commands sent to the GPU that have not yet +/// finished execution. +/// +/// [`Device`]: crate::device::resource::Device +/// [`Buffer`]: crate::resource::Buffer +#[derive(Debug)] +pub struct ResourceInfo<T: Resource> { + id: Option<Id<T::Marker>>, + identity: Option<Arc<IdentityManager<T::Marker>>>, + /// The index of the last queue submission in which the resource + /// was used. + /// + /// Each queue submission is fenced and assigned an index number + /// sequentially. Thus, when a queue submission completes, we know any + /// resources used in that submission and any lower-numbered submissions are + /// no longer in use by the GPU. + submission_index: AtomicUsize, + + /// The `label` from the descriptor used to create the resource. + pub(crate) label: String, +} + +impl<T: Resource> Drop for ResourceInfo<T> { + fn drop(&mut self) { + if let Some(identity) = self.identity.as_ref() { + let id = self.id.as_ref().unwrap(); + identity.free(*id); + } + } +} + +impl<T: Resource> ResourceInfo<T> { + #[allow(unused_variables)] + pub(crate) fn new(label: &str) -> Self { + Self { + id: None, + identity: None, + submission_index: AtomicUsize::new(0), + label: label.to_string(), + } + } + + pub(crate) fn label(&self) -> &dyn Debug + where + Id<T::Marker>: Debug, + { + if !self.label.is_empty() { + return &self.label; + } + + if let Some(id) = &self.id { + return id; + } + + &"" + } + + pub(crate) fn id(&self) -> Id<T::Marker> { + self.id.unwrap() + } + + pub(crate) fn set_id(&mut self, id: Id<T::Marker>, identity: &Arc<IdentityManager<T::Marker>>) { + self.id = Some(id); + self.identity = Some(identity.clone()); + } + + /// Record that this resource will be used by the queue submission with the + /// given index. + pub(crate) fn use_at(&self, submit_index: SubmissionIndex) { + self.submission_index + .store(submit_index as _, Ordering::Release); + } + + pub(crate) fn submission_index(&self) -> SubmissionIndex { + self.submission_index.load(Ordering::Acquire) as _ + } +} + +pub(crate) type ResourceType = &'static str; + +pub trait Resource: 'static + Sized + WasmNotSendSync { + type Marker: Marker; + const TYPE: ResourceType; + fn as_info(&self) -> &ResourceInfo<Self>; + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self>; + fn label(&self) -> String { + self.as_info().label.clone() + } + fn ref_count(self: &Arc<Self>) -> usize { + Arc::strong_count(self) + } + fn is_unique(self: &Arc<Self>) -> bool { + self.ref_count() == 1 + } + fn is_equal(&self, other: &Self) -> bool { + self.as_info().id().unzip() == other.as_info().id().unzip() + } +} + +/// The status code provided to the buffer mapping callback. +/// +/// This is very similar to `BufferAccessResult`, except that this is FFI-friendly. +#[repr(C)] +#[derive(Debug)] +pub enum BufferMapAsyncStatus { + /// The Buffer is successfully mapped, `get_mapped_range` can be called. + /// + /// All other variants of this enum represent failures to map the buffer. + Success, + /// The buffer is already mapped. + /// + /// While this is treated as an error, it does not prevent mapped range from being accessed. + AlreadyMapped, + /// Mapping was already requested. + MapAlreadyPending, + /// An unknown error. + Error, + /// Mapping was aborted (by unmapping or destroying the buffer before mapping + /// happened). + Aborted, + /// The context is Lost. + ContextLost, + /// The buffer is in an invalid state. + Invalid, + /// The range isn't fully contained in the buffer. + InvalidRange, + /// The range isn't properly aligned. + InvalidAlignment, + /// Incompatible usage flags. + InvalidUsageFlags, +} + +#[derive(Debug)] +pub(crate) enum BufferMapState<A: HalApi> { + /// Mapped at creation. + Init { + ptr: NonNull<u8>, + stage_buffer: Arc<Buffer<A>>, + needs_flush: bool, + }, + /// Waiting for GPU to be done before mapping + Waiting(BufferPendingMapping<A>), + /// Mapped + Active { + ptr: NonNull<u8>, + range: hal::MemoryRange, + host: HostMap, + }, + /// Not mapped + Idle, +} + +#[cfg(send_sync)] +unsafe impl<A: HalApi> Send for BufferMapState<A> {} +#[cfg(send_sync)] +unsafe impl<A: HalApi> Sync for BufferMapState<A> {} + +#[repr(C)] +pub struct BufferMapCallbackC { + pub callback: unsafe extern "C" fn(status: BufferMapAsyncStatus, user_data: *mut u8), + pub user_data: *mut u8, +} + +#[cfg(send_sync)] +unsafe impl Send for BufferMapCallbackC {} + +#[derive(Debug)] +pub struct BufferMapCallback { + // We wrap this so creating the enum in the C variant can be unsafe, + // allowing our call function to be safe. + inner: BufferMapCallbackInner, +} + +#[cfg(send_sync)] +type BufferMapCallbackCallback = Box<dyn FnOnce(BufferAccessResult) + Send + 'static>; +#[cfg(not(send_sync))] +type BufferMapCallbackCallback = Box<dyn FnOnce(BufferAccessResult) + 'static>; + +enum BufferMapCallbackInner { + Rust { callback: BufferMapCallbackCallback }, + C { inner: BufferMapCallbackC }, +} + +impl Debug for BufferMapCallbackInner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match *self { + BufferMapCallbackInner::Rust { callback: _ } => f.debug_struct("Rust").finish(), + BufferMapCallbackInner::C { inner: _ } => f.debug_struct("C").finish(), + } + } +} + +impl BufferMapCallback { + pub fn from_rust(callback: BufferMapCallbackCallback) -> Self { + Self { + inner: BufferMapCallbackInner::Rust { callback }, + } + } + + /// # Safety + /// + /// - The callback pointer must be valid to call with the provided user_data + /// pointer. + /// + /// - Both pointers must point to valid memory until the callback is + /// invoked, which may happen at an unspecified time. + pub unsafe fn from_c(inner: BufferMapCallbackC) -> Self { + Self { + inner: BufferMapCallbackInner::C { inner }, + } + } + + pub(crate) fn call(self, result: BufferAccessResult) { + match self.inner { + BufferMapCallbackInner::Rust { callback } => { + callback(result); + } + // SAFETY: the contract of the call to from_c says that this unsafe is sound. + BufferMapCallbackInner::C { inner } => unsafe { + let status = match result { + Ok(()) => BufferMapAsyncStatus::Success, + Err(BufferAccessError::Device(_)) => BufferMapAsyncStatus::ContextLost, + Err(BufferAccessError::Invalid) | Err(BufferAccessError::Destroyed) => { + BufferMapAsyncStatus::Invalid + } + Err(BufferAccessError::AlreadyMapped) => BufferMapAsyncStatus::AlreadyMapped, + Err(BufferAccessError::MapAlreadyPending) => { + BufferMapAsyncStatus::MapAlreadyPending + } + Err(BufferAccessError::MissingBufferUsage(_)) => { + BufferMapAsyncStatus::InvalidUsageFlags + } + Err(BufferAccessError::UnalignedRange) + | Err(BufferAccessError::UnalignedRangeSize { .. }) + | Err(BufferAccessError::UnalignedOffset { .. }) => { + BufferMapAsyncStatus::InvalidAlignment + } + Err(BufferAccessError::OutOfBoundsUnderrun { .. }) + | Err(BufferAccessError::OutOfBoundsOverrun { .. }) + | Err(BufferAccessError::NegativeRange { .. }) => { + BufferMapAsyncStatus::InvalidRange + } + Err(_) => BufferMapAsyncStatus::Error, + }; + + (inner.callback)(status, inner.user_data); + }, + } + } +} + +#[derive(Debug)] +pub struct BufferMapOperation { + pub host: HostMap, + pub callback: Option<BufferMapCallback>, +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum BufferAccessError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Buffer map failed")] + Failed, + #[error("Buffer is invalid")] + Invalid, + #[error("Buffer is destroyed")] + Destroyed, + #[error("Buffer is already mapped")] + AlreadyMapped, + #[error("Buffer map is pending")] + MapAlreadyPending, + #[error(transparent)] + MissingBufferUsage(#[from] MissingBufferUsageError), + #[error("Buffer is not mapped")] + NotMapped, + #[error( + "Buffer map range must start aligned to `MAP_ALIGNMENT` and end to `COPY_BUFFER_ALIGNMENT`" + )] + UnalignedRange, + #[error("Buffer offset invalid: offset {offset} must be multiple of 8")] + UnalignedOffset { offset: wgt::BufferAddress }, + #[error("Buffer range size invalid: range_size {range_size} must be multiple of 4")] + UnalignedRangeSize { range_size: wgt::BufferAddress }, + #[error("Buffer access out of bounds: index {index} would underrun the buffer (limit: {min})")] + OutOfBoundsUnderrun { + index: wgt::BufferAddress, + min: wgt::BufferAddress, + }, + #[error( + "Buffer access out of bounds: last index {index} would overrun the buffer (limit: {max})" + )] + OutOfBoundsOverrun { + index: wgt::BufferAddress, + max: wgt::BufferAddress, + }, + #[error("Buffer map range start {start} is greater than end {end}")] + NegativeRange { + start: wgt::BufferAddress, + end: wgt::BufferAddress, + }, + #[error("Buffer map aborted")] + MapAborted, +} + +pub type BufferAccessResult = Result<(), BufferAccessError>; + +#[derive(Debug)] +pub(crate) struct BufferPendingMapping<A: HalApi> { + pub range: Range<wgt::BufferAddress>, + pub op: BufferMapOperation, + // hold the parent alive while the mapping is active + pub _parent_buffer: Arc<Buffer<A>>, +} + +pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>; + +#[derive(Debug)] +pub struct Buffer<A: HalApi> { + pub(crate) raw: Snatchable<A::Buffer>, + pub(crate) device: Arc<Device<A>>, + pub(crate) usage: wgt::BufferUsages, + pub(crate) size: wgt::BufferAddress, + pub(crate) initialization_status: RwLock<BufferInitTracker>, + pub(crate) sync_mapped_writes: Mutex<Option<hal::MemoryRange>>, + pub(crate) info: ResourceInfo<Buffer<A>>, + pub(crate) map_state: Mutex<BufferMapState<A>>, + pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup<A>>>>, +} + +impl<A: HalApi> Drop for Buffer<A> { + fn drop(&mut self) { + if let Some(raw) = self.raw.take() { + resource_log!("Destroy raw Buffer (dropped) {:?}", self.info.label()); + + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyBuffer(self.info.id())); + } + + unsafe { + use hal::Device; + self.device.raw().destroy_buffer(raw); + } + } + } +} + +impl<A: HalApi> Buffer<A> { + pub(crate) fn raw(&self, guard: &SnatchGuard) -> Option<&A::Buffer> { + self.raw.get(guard) + } + + pub(crate) fn is_destroyed(&self, guard: &SnatchGuard) -> bool { + self.raw.get(guard).is_none() + } + + // Note: This must not be called while holding a lock. + pub(crate) fn unmap(self: &Arc<Self>) -> Result<(), BufferAccessError> { + if let Some((mut operation, status)) = self.unmap_inner()? { + if let Some(callback) = operation.callback.take() { + callback.call(status); + } + } + + Ok(()) + } + + fn unmap_inner(self: &Arc<Self>) -> Result<Option<BufferMapPendingClosure>, BufferAccessError> { + use hal::Device; + + let device = &self.device; + let snatch_guard = device.snatchable_lock.read(); + let raw_buf = self + .raw(&snatch_guard) + .ok_or(BufferAccessError::Destroyed)?; + let buffer_id = self.info.id(); + log::debug!("Buffer {:?} map state -> Idle", buffer_id); + match mem::replace(&mut *self.map_state.lock(), resource::BufferMapState::Idle) { + resource::BufferMapState::Init { + ptr, + stage_buffer, + needs_flush, + } => { + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + let data = trace.make_binary("bin", unsafe { + std::slice::from_raw_parts(ptr.as_ptr(), self.size as usize) + }); + trace.add(trace::Action::WriteBuffer { + id: buffer_id, + data, + range: 0..self.size, + queued: true, + }); + } + let _ = ptr; + if needs_flush { + unsafe { + device.raw().flush_mapped_ranges( + stage_buffer.raw(&snatch_guard).unwrap(), + iter::once(0..self.size), + ); + } + } + + self.info + .use_at(device.active_submission_index.load(Ordering::Relaxed) + 1); + let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy { + src_offset: 0, + dst_offset: 0, + size, + }); + let transition_src = hal::BufferBarrier { + buffer: stage_buffer.raw(&snatch_guard).unwrap(), + usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, + }; + let transition_dst = hal::BufferBarrier { + buffer: raw_buf, + usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST, + }; + let mut pending_writes = device.pending_writes.lock(); + let pending_writes = pending_writes.as_mut().unwrap(); + let encoder = pending_writes.activate(); + unsafe { + encoder.transition_buffers( + iter::once(transition_src).chain(iter::once(transition_dst)), + ); + if self.size > 0 { + encoder.copy_buffer_to_buffer( + stage_buffer.raw(&snatch_guard).unwrap(), + raw_buf, + region.into_iter(), + ); + } + } + pending_writes.consume_temp(queue::TempResource::Buffer(stage_buffer)); + pending_writes.dst_buffers.insert(buffer_id, self.clone()); + } + resource::BufferMapState::Idle => { + return Err(BufferAccessError::NotMapped); + } + resource::BufferMapState::Waiting(pending) => { + return Ok(Some((pending.op, Err(BufferAccessError::MapAborted)))); + } + resource::BufferMapState::Active { ptr, range, host } => { + if host == HostMap::Write { + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + let size = range.end - range.start; + let data = trace.make_binary("bin", unsafe { + std::slice::from_raw_parts(ptr.as_ptr(), size as usize) + }); + trace.add(trace::Action::WriteBuffer { + id: buffer_id, + data, + range: range.clone(), + queued: false, + }); + } + let _ = (ptr, range); + } + unsafe { + device + .raw() + .unmap_buffer(raw_buf) + .map_err(DeviceError::from)? + }; + } + } + Ok(None) + } + + pub(crate) fn destroy(self: &Arc<Self>) -> Result<(), DestroyError> { + let device = &self.device; + let buffer_id = self.info.id(); + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::FreeBuffer(buffer_id)); + } + + let temp = { + let snatch_guard = device.snatchable_lock.write(); + let raw = match self.raw.snatch(snatch_guard) { + Some(raw) => raw, + None => { + return Err(resource::DestroyError::AlreadyDestroyed); + } + }; + + let bind_groups = { + let mut guard = self.bind_groups.lock(); + std::mem::take(&mut *guard) + }; + + queue::TempResource::DestroyedBuffer(Arc::new(DestroyedBuffer { + raw: Some(raw), + device: Arc::clone(&self.device), + submission_index: self.info.submission_index(), + id: self.info.id.unwrap(), + label: self.info.label.clone(), + bind_groups, + })) + }; + + let mut pending_writes = device.pending_writes.lock(); + let pending_writes = pending_writes.as_mut().unwrap(); + if pending_writes.dst_buffers.contains_key(&buffer_id) { + pending_writes.temp_resources.push(temp); + } else { + let last_submit_index = self.info.submission_index(); + device + .lock_life() + .schedule_resource_destruction(temp, last_submit_index); + } + + Ok(()) + } +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CreateBufferError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Failed to map buffer while creating: {0}")] + AccessError(#[from] BufferAccessError), + #[error("Buffers that are mapped at creation have to be aligned to `COPY_BUFFER_ALIGNMENT`")] + UnalignedSize, + #[error("Invalid usage flags {0:?}")] + InvalidUsage(wgt::BufferUsages), + #[error("`MAP` usage can only be combined with the opposite `COPY`, requested {0:?}")] + UsageMismatch(wgt::BufferUsages), + #[error("Buffer size {requested} is greater than the maximum buffer size ({maximum})")] + MaxBufferSize { requested: u64, maximum: u64 }, + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), +} + +impl<A: HalApi> Resource for Buffer<A> { + const TYPE: ResourceType = "Buffer"; + + type Marker = crate::id::markers::Buffer; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +/// A buffer that has been marked as destroyed and is staged for actual deletion soon. +#[derive(Debug)] +pub struct DestroyedBuffer<A: HalApi> { + raw: Option<A::Buffer>, + device: Arc<Device<A>>, + label: String, + pub(crate) id: BufferId, + pub(crate) submission_index: u64, + bind_groups: Vec<Weak<BindGroup<A>>>, +} + +impl<A: HalApi> DestroyedBuffer<A> { + pub fn label(&self) -> &dyn Debug { + if !self.label.is_empty() { + return &self.label; + } + + &self.id + } +} + +impl<A: HalApi> Drop for DestroyedBuffer<A> { + fn drop(&mut self) { + let mut deferred = self.device.deferred_destroy.lock(); + for bind_group in self.bind_groups.drain(..) { + deferred.push(DeferredDestroy::BindGroup(bind_group)); + } + drop(deferred); + + if let Some(raw) = self.raw.take() { + resource_log!("Destroy raw Buffer (destroyed) {:?}", self.label()); + + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyBuffer(self.id)); + } + + unsafe { + use hal::Device; + self.device.raw().destroy_buffer(raw); + } + } + } +} + +/// A temporary buffer, consumed by the command that uses it. +/// +/// A [`StagingBuffer`] is designed for one-shot uploads of data to the GPU. It +/// is always created mapped, and the command that uses it destroys the buffer +/// when it is done. +/// +/// [`StagingBuffer`]s can be created with [`queue_create_staging_buffer`] and +/// used with [`queue_write_staging_buffer`]. They are also used internally by +/// operations like [`queue_write_texture`] that need to upload data to the GPU, +/// but that don't belong to any particular wgpu command buffer. +/// +/// Used `StagingBuffer`s are accumulated in [`Device::pending_writes`], to be +/// freed once their associated operation's queue submission has finished +/// execution. +/// +/// [`queue_create_staging_buffer`]: Global::queue_create_staging_buffer +/// [`queue_write_staging_buffer`]: Global::queue_write_staging_buffer +/// [`queue_write_texture`]: Global::queue_write_texture +/// [`Device::pending_writes`]: crate::device::Device +#[derive(Debug)] +pub struct StagingBuffer<A: HalApi> { + pub(crate) raw: Mutex<Option<A::Buffer>>, + pub(crate) device: Arc<Device<A>>, + pub(crate) size: wgt::BufferAddress, + pub(crate) is_coherent: bool, + pub(crate) info: ResourceInfo<StagingBuffer<A>>, +} + +impl<A: HalApi> Drop for StagingBuffer<A> { + fn drop(&mut self) { + if let Some(raw) = self.raw.lock().take() { + resource_log!("Destroy raw StagingBuffer {:?}", self.info.label()); + unsafe { + use hal::Device; + self.device.raw().destroy_buffer(raw); + } + } + } +} + +impl<A: HalApi> Resource for StagingBuffer<A> { + const TYPE: ResourceType = "StagingBuffer"; + + type Marker = crate::id::markers::StagingBuffer; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } + + fn label(&self) -> String { + String::from("<StagingBuffer>") + } +} + +pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>, Vec<wgt::TextureFormat>>; + +#[derive(Debug)] +pub(crate) enum TextureInner<A: HalApi> { + Native { + raw: A::Texture, + }, + Surface { + raw: Option<A::SurfaceTexture>, + parent_id: SurfaceId, + has_work: AtomicBool, + }, +} + +impl<A: HalApi> TextureInner<A> { + pub fn raw(&self) -> Option<&A::Texture> { + match self { + Self::Native { raw } => Some(raw), + Self::Surface { raw: Some(tex), .. } => Some(tex.borrow()), + _ => None, + } + } +} + +#[derive(Debug)] +pub enum TextureClearMode<A: HalApi> { + BufferCopy, + // View for clear via RenderPass for every subsurface (mip/layer/slice) + RenderPass { + clear_views: SmallVec<[Option<A::TextureView>; 1]>, + is_color: bool, + }, + Surface { + clear_view: Option<A::TextureView>, + }, + // Texture can't be cleared, attempting to do so will cause panic. + // (either because it is impossible for the type of texture or it is being destroyed) + None, +} + +#[derive(Debug)] +pub struct Texture<A: HalApi> { + pub(crate) inner: Snatchable<TextureInner<A>>, + pub(crate) device: Arc<Device<A>>, + pub(crate) desc: wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>, + pub(crate) hal_usage: hal::TextureUses, + pub(crate) format_features: wgt::TextureFormatFeatures, + pub(crate) initialization_status: RwLock<TextureInitTracker>, + pub(crate) full_range: TextureSelector, + pub(crate) info: ResourceInfo<Texture<A>>, + pub(crate) clear_mode: RwLock<TextureClearMode<A>>, + pub(crate) views: Mutex<Vec<Weak<TextureView<A>>>>, + pub(crate) bind_groups: Mutex<Vec<Weak<BindGroup<A>>>>, +} + +impl<A: HalApi> Drop for Texture<A> { + fn drop(&mut self) { + resource_log!("Destroy raw Texture {:?}", self.info.label()); + use hal::Device; + let mut clear_mode = self.clear_mode.write(); + let clear_mode = &mut *clear_mode; + match *clear_mode { + TextureClearMode::Surface { + ref mut clear_view, .. + } => { + if let Some(view) = clear_view.take() { + unsafe { + self.device.raw().destroy_texture_view(view); + } + } + } + TextureClearMode::RenderPass { + ref mut clear_views, + .. + } => { + clear_views.iter_mut().for_each(|clear_view| { + if let Some(view) = clear_view.take() { + unsafe { + self.device.raw().destroy_texture_view(view); + } + } + }); + } + _ => {} + }; + + if let Some(TextureInner::Native { raw }) = self.inner.take() { + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyTexture(self.info.id())); + } + + unsafe { + self.device.raw().destroy_texture(raw); + } + } + } +} + +impl<A: HalApi> Texture<A> { + pub(crate) fn raw<'a>(&'a self, snatch_guard: &'a SnatchGuard) -> Option<&'a A::Texture> { + self.inner.get(snatch_guard)?.raw() + } + + pub(crate) fn is_destroyed(&self, guard: &SnatchGuard) -> bool { + self.inner.get(guard).is_none() + } + + pub(crate) fn inner_mut<'a>( + &'a self, + guard: &mut ExclusiveSnatchGuard, + ) -> Option<&'a mut TextureInner<A>> { + self.inner.get_mut(guard) + } + pub(crate) fn get_clear_view<'a>( + clear_mode: &'a TextureClearMode<A>, + desc: &'a wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>, + mip_level: u32, + depth_or_layer: u32, + ) -> &'a A::TextureView { + match *clear_mode { + TextureClearMode::BufferCopy => { + panic!("Given texture is cleared with buffer copies, not render passes") + } + TextureClearMode::None => { + panic!("Given texture can't be cleared") + } + TextureClearMode::Surface { ref clear_view, .. } => clear_view.as_ref().unwrap(), + TextureClearMode::RenderPass { + ref clear_views, .. + } => { + let index = if desc.dimension == wgt::TextureDimension::D3 { + (0..mip_level).fold(0, |acc, mip| { + acc + (desc.size.depth_or_array_layers >> mip).max(1) + }) + } else { + mip_level * desc.size.depth_or_array_layers + } + depth_or_layer; + clear_views[index as usize].as_ref().unwrap() + } + } + } + + pub(crate) fn destroy(self: &Arc<Self>) -> Result<(), DestroyError> { + let device = &self.device; + let texture_id = self.info.id(); + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(trace::Action::FreeTexture(texture_id)); + } + + let temp = { + let snatch_guard = device.snatchable_lock.write(); + let raw = match self.inner.snatch(snatch_guard) { + Some(TextureInner::Native { raw }) => raw, + Some(TextureInner::Surface { .. }) => { + return Ok(()); + } + None => { + return Err(resource::DestroyError::AlreadyDestroyed); + } + }; + + let views = { + let mut guard = self.views.lock(); + std::mem::take(&mut *guard) + }; + + let bind_groups = { + let mut guard = self.bind_groups.lock(); + std::mem::take(&mut *guard) + }; + + queue::TempResource::DestroyedTexture(Arc::new(DestroyedTexture { + raw: Some(raw), + views, + bind_groups, + device: Arc::clone(&self.device), + submission_index: self.info.submission_index(), + id: self.info.id.unwrap(), + label: self.info.label.clone(), + })) + }; + + let mut pending_writes = device.pending_writes.lock(); + let pending_writes = pending_writes.as_mut().unwrap(); + if pending_writes.dst_textures.contains_key(&texture_id) { + pending_writes.temp_resources.push(temp); + } else { + let last_submit_index = self.info.submission_index(); + device + .lock_life() + .schedule_resource_destruction(temp, last_submit_index); + } + + Ok(()) + } +} + +impl Global { + /// # Safety + /// + /// - The raw texture handle must not be manually destroyed + pub unsafe fn texture_as_hal<A: HalApi, F: FnOnce(Option<&A::Texture>)>( + &self, + id: TextureId, + hal_texture_callback: F, + ) { + profiling::scope!("Texture::as_hal"); + + let hub = A::hub(self); + let texture_opt = { hub.textures.try_get(id).ok().flatten() }; + let texture = texture_opt.as_ref().unwrap(); + let snatch_guard = texture.device.snatchable_lock.read(); + let hal_texture = texture.raw(&snatch_guard); + + hal_texture_callback(hal_texture); + } + + /// # Safety + /// + /// - The raw adapter handle must not be manually destroyed + pub unsafe fn adapter_as_hal<A: HalApi, F: FnOnce(Option<&A::Adapter>) -> R, R>( + &self, + id: AdapterId, + hal_adapter_callback: F, + ) -> R { + profiling::scope!("Adapter::as_hal"); + + let hub = A::hub(self); + let adapter = hub.adapters.try_get(id).ok().flatten(); + let hal_adapter = adapter.as_ref().map(|adapter| &adapter.raw.adapter); + + hal_adapter_callback(hal_adapter) + } + + /// # Safety + /// + /// - The raw device handle must not be manually destroyed + pub unsafe fn device_as_hal<A: HalApi, F: FnOnce(Option<&A::Device>) -> R, R>( + &self, + id: DeviceId, + hal_device_callback: F, + ) -> R { + profiling::scope!("Device::as_hal"); + + let hub = A::hub(self); + let device = hub.devices.try_get(id).ok().flatten(); + let hal_device = device.as_ref().map(|device| device.raw()); + + hal_device_callback(hal_device) + } + + /// # Safety + /// + /// - The raw fence handle must not be manually destroyed + pub unsafe fn device_fence_as_hal<A: HalApi, F: FnOnce(Option<&A::Fence>) -> R, R>( + &self, + id: DeviceId, + hal_fence_callback: F, + ) -> R { + profiling::scope!("Device::fence_as_hal"); + + let hub = A::hub(self); + let device = hub.devices.try_get(id).ok().flatten(); + let hal_fence = device.as_ref().map(|device| device.fence.read()); + + hal_fence_callback(hal_fence.as_deref().unwrap().as_ref()) + } + + /// # Safety + /// - The raw surface handle must not be manually destroyed + pub unsafe fn surface_as_hal<A: HalApi, F: FnOnce(Option<&A::Surface>) -> R, R>( + &self, + id: SurfaceId, + hal_surface_callback: F, + ) -> R { + profiling::scope!("Surface::as_hal"); + + let surface = self.surfaces.get(id).ok(); + let hal_surface = surface.as_ref().and_then(|surface| A::get_surface(surface)); + + hal_surface_callback(hal_surface) + } +} + +/// A texture that has been marked as destroyed and is staged for actual deletion soon. +#[derive(Debug)] +pub struct DestroyedTexture<A: HalApi> { + raw: Option<A::Texture>, + views: Vec<Weak<TextureView<A>>>, + bind_groups: Vec<Weak<BindGroup<A>>>, + device: Arc<Device<A>>, + label: String, + pub(crate) id: TextureId, + pub(crate) submission_index: u64, +} + +impl<A: HalApi> DestroyedTexture<A> { + pub fn label(&self) -> &dyn Debug { + if !self.label.is_empty() { + return &self.label; + } + + &self.id + } +} + +impl<A: HalApi> Drop for DestroyedTexture<A> { + fn drop(&mut self) { + let device = &self.device; + + let mut deferred = device.deferred_destroy.lock(); + for view in self.views.drain(..) { + deferred.push(DeferredDestroy::TextureView(view)); + } + for bind_group in self.bind_groups.drain(..) { + deferred.push(DeferredDestroy::BindGroup(bind_group)); + } + drop(deferred); + + if let Some(raw) = self.raw.take() { + resource_log!("Destroy raw Texture (destroyed) {:?}", self.label()); + + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyTexture(self.id)); + } + + unsafe { + use hal::Device; + self.device.raw().destroy_texture(raw); + } + } + } +} + +#[derive(Clone, Copy, Debug)] +pub enum TextureErrorDimension { + X, + Y, + Z, +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum TextureDimensionError { + #[error("Dimension {0:?} is zero")] + Zero(TextureErrorDimension), + #[error("Dimension {dim:?} value {given} exceeds the limit of {limit}")] + LimitExceeded { + dim: TextureErrorDimension, + given: u32, + limit: u32, + }, + #[error("Sample count {0} is invalid")] + InvalidSampleCount(u32), + #[error("Width {width} is not a multiple of {format:?}'s block width ({block_width})")] + NotMultipleOfBlockWidth { + width: u32, + block_width: u32, + format: wgt::TextureFormat, + }, + #[error("Height {height} is not a multiple of {format:?}'s block height ({block_height})")] + NotMultipleOfBlockHeight { + height: u32, + block_height: u32, + format: wgt::TextureFormat, + }, + #[error( + "Width {width} is not a multiple of {format:?}'s width multiple requirement ({multiple})" + )] + WidthNotMultipleOf { + width: u32, + multiple: u32, + format: wgt::TextureFormat, + }, + #[error("Height {height} is not a multiple of {format:?}'s height multiple requirement ({multiple})")] + HeightNotMultipleOf { + height: u32, + multiple: u32, + format: wgt::TextureFormat, + }, + #[error("Multisampled texture depth or array layers must be 1, got {0}")] + MultisampledDepthOrArrayLayer(u32), +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CreateTextureError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error(transparent)] + CreateTextureView(#[from] CreateTextureViewError), + #[error("Invalid usage flags {0:?}")] + InvalidUsage(wgt::TextureUsages), + #[error(transparent)] + InvalidDimension(#[from] TextureDimensionError), + #[error("Depth texture ({1:?}) can't be created as {0:?}")] + InvalidDepthDimension(wgt::TextureDimension, wgt::TextureFormat), + #[error("Compressed texture ({1:?}) can't be created as {0:?}")] + InvalidCompressedDimension(wgt::TextureDimension, wgt::TextureFormat), + #[error( + "Texture descriptor mip level count {requested} is invalid, maximum allowed is {maximum}" + )] + InvalidMipLevelCount { requested: u32, maximum: u32 }, + #[error( + "Texture usages {0:?} are not allowed on a texture of type {1:?}{}", + if *.2 { " due to downlevel restrictions" } else { "" } + )] + InvalidFormatUsages(wgt::TextureUsages, wgt::TextureFormat, bool), + #[error("The view format {0:?} is not compatible with texture format {1:?}, only changing srgb-ness is allowed.")] + InvalidViewFormat(wgt::TextureFormat, wgt::TextureFormat), + #[error("Texture usages {0:?} are not allowed on a texture of dimensions {1:?}")] + InvalidDimensionUsages(wgt::TextureUsages, wgt::TextureDimension), + #[error("Texture usage STORAGE_BINDING is not allowed for multisampled textures")] + InvalidMultisampledStorageBinding, + #[error("Format {0:?} does not support multisampling")] + InvalidMultisampledFormat(wgt::TextureFormat), + #[error("Sample count {0} is not supported by format {1:?} on this device. The WebGPU spec guarantees {2:?} samples are supported by this format. With the TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES feature your device supports {3:?}.")] + InvalidSampleCount(u32, wgt::TextureFormat, Vec<u32>, Vec<u32>), + #[error("Multisampled textures must have RENDER_ATTACHMENT usage")] + MultisampledNotRenderAttachment, + #[error("Texture format {0:?} can't be used due to missing features")] + MissingFeatures(wgt::TextureFormat, #[source] MissingFeatures), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), +} + +impl<A: HalApi> Resource for Texture<A> { + const TYPE: ResourceType = "Texture"; + + type Marker = crate::id::markers::Texture; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +impl<A: HalApi> Borrow<TextureSelector> for Texture<A> { + fn borrow(&self) -> &TextureSelector { + &self.full_range + } +} + +/// Describes a [`TextureView`]. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", serde(default))] +pub struct TextureViewDescriptor<'a> { + /// Debug label of the texture view. + /// + /// This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// Format of the texture view, or `None` for the same format as the texture + /// itself. + /// + /// At this time, it must be the same the underlying format of the texture. + pub format: Option<wgt::TextureFormat>, + /// The dimension of the texture view. + /// + /// - For 1D textures, this must be `D1`. + /// - For 2D textures it must be one of `D2`, `D2Array`, `Cube`, or `CubeArray`. + /// - For 3D textures it must be `D3`. + pub dimension: Option<wgt::TextureViewDimension>, + /// Range within the texture that is accessible via this view. + pub range: wgt::ImageSubresourceRange, +} + +#[derive(Debug)] +pub(crate) struct HalTextureViewDescriptor { + pub texture_format: wgt::TextureFormat, + pub format: wgt::TextureFormat, + pub dimension: wgt::TextureViewDimension, + pub range: wgt::ImageSubresourceRange, +} + +impl HalTextureViewDescriptor { + pub fn aspects(&self) -> hal::FormatAspects { + hal::FormatAspects::new(self.texture_format, self.range.aspect) + } +} + +#[derive(Debug, Copy, Clone, Error)] +pub enum TextureViewNotRenderableReason { + #[error("The texture this view references doesn't include the RENDER_ATTACHMENT usage. Provided usages: {0:?}")] + Usage(wgt::TextureUsages), + #[error("The dimension of this texture view is not 2D. View dimension: {0:?}")] + Dimension(wgt::TextureViewDimension), + #[error("This texture view has more than one mipmap level. View mipmap levels: {0:?}")] + MipLevelCount(u32), + #[error("This texture view has more than one array layer. View array layers: {0:?}")] + ArrayLayerCount(u32), + #[error( + "The aspects of this texture view are a subset of the aspects in the original texture. Aspects: {0:?}" + )] + Aspects(hal::FormatAspects), +} + +#[derive(Debug)] +pub struct TextureView<A: HalApi> { + pub(crate) raw: Snatchable<A::TextureView>, + // if it's a surface texture - it's none + pub(crate) parent: Arc<Texture<A>>, + pub(crate) device: Arc<Device<A>>, + //TODO: store device_id for quick access? + pub(crate) desc: HalTextureViewDescriptor, + pub(crate) format_features: wgt::TextureFormatFeatures, + /// This is `Err` only if the texture view is not renderable + pub(crate) render_extent: Result<wgt::Extent3d, TextureViewNotRenderableReason>, + pub(crate) samples: u32, + pub(crate) selector: TextureSelector, + pub(crate) info: ResourceInfo<TextureView<A>>, +} + +impl<A: HalApi> Drop for TextureView<A> { + fn drop(&mut self) { + if let Some(raw) = self.raw.take() { + resource_log!("Destroy raw TextureView {:?}", self.info.label()); + + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyTextureView(self.info.id())); + } + + unsafe { + use hal::Device; + self.device.raw().destroy_texture_view(raw); + } + } + } +} + +impl<A: HalApi> TextureView<A> { + pub(crate) fn raw<'a>(&'a self, snatch_guard: &'a SnatchGuard) -> Option<&'a A::TextureView> { + self.raw.get(snatch_guard) + } +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CreateTextureViewError { + #[error("Parent texture is invalid or destroyed")] + InvalidTexture, + #[error("Not enough memory left to create texture view")] + OutOfMemory, + #[error("Invalid texture view dimension `{view:?}` with texture of dimension `{texture:?}`")] + InvalidTextureViewDimension { + view: wgt::TextureViewDimension, + texture: wgt::TextureDimension, + }, + #[error("Invalid texture view dimension `{0:?}` of a multisampled texture")] + InvalidMultisampledTextureViewDimension(wgt::TextureViewDimension), + #[error("Invalid texture depth `{depth}` for texture view of dimension `Cubemap`. Cubemap views must use images of size 6.")] + InvalidCubemapTextureDepth { depth: u32 }, + #[error("Invalid texture depth `{depth}` for texture view of dimension `CubemapArray`. Cubemap views must use images with sizes which are a multiple of 6.")] + InvalidCubemapArrayTextureDepth { depth: u32 }, + #[error("Source texture width and height must be equal for a texture view of dimension `Cube`/`CubeArray`")] + InvalidCubeTextureViewSize, + #[error("Mip level count is 0")] + ZeroMipLevelCount, + #[error("Array layer count is 0")] + ZeroArrayLayerCount, + #[error( + "TextureView mip level count + base mip level {requested} must be <= Texture mip level count {total}" + )] + TooManyMipLevels { requested: u32, total: u32 }, + #[error("TextureView array layer count + base array layer {requested} must be <= Texture depth/array layer count {total}")] + TooManyArrayLayers { requested: u32, total: u32 }, + #[error("Requested array layer count {requested} is not valid for the target view dimension {dim:?}")] + InvalidArrayLayerCount { + requested: u32, + dim: wgt::TextureViewDimension, + }, + #[error("Aspect {requested_aspect:?} is not in the source texture format {texture_format:?}")] + InvalidAspect { + texture_format: wgt::TextureFormat, + requested_aspect: wgt::TextureAspect, + }, + #[error("Unable to view texture {texture:?} as {view:?}")] + FormatReinterpretation { + texture: wgt::TextureFormat, + view: wgt::TextureFormat, + }, +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum TextureViewDestroyError {} + +impl<A: HalApi> Resource for TextureView<A> { + const TYPE: ResourceType = "TextureView"; + + type Marker = crate::id::markers::TextureView; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +/// Describes a [`Sampler`] +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct SamplerDescriptor<'a> { + /// Debug label of the sampler. + /// + /// This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// How to deal with out of bounds accesses in the u (i.e. x) direction + pub address_modes: [wgt::AddressMode; 3], + /// How to filter the texture when it needs to be magnified (made larger) + pub mag_filter: wgt::FilterMode, + /// How to filter the texture when it needs to be minified (made smaller) + pub min_filter: wgt::FilterMode, + /// How to filter between mip map levels + pub mipmap_filter: wgt::FilterMode, + /// Minimum level of detail (i.e. mip level) to use + pub lod_min_clamp: f32, + /// Maximum level of detail (i.e. mip level) to use + pub lod_max_clamp: f32, + /// If this is enabled, this is a comparison sampler using the given comparison function. + pub compare: Option<wgt::CompareFunction>, + /// Must be at least 1. If this is not 1, all filter modes must be linear. + pub anisotropy_clamp: u16, + /// Border color to use when address_mode is + /// [`AddressMode::ClampToBorder`](wgt::AddressMode::ClampToBorder) + pub border_color: Option<wgt::SamplerBorderColor>, +} + +#[derive(Debug)] +pub struct Sampler<A: HalApi> { + pub(crate) raw: Option<A::Sampler>, + pub(crate) device: Arc<Device<A>>, + pub(crate) info: ResourceInfo<Self>, + /// `true` if this is a comparison sampler + pub(crate) comparison: bool, + /// `true` if this is a filtering sampler + pub(crate) filtering: bool, +} + +impl<A: HalApi> Drop for Sampler<A> { + fn drop(&mut self) { + resource_log!("Destroy raw Sampler {:?}", self.info.label()); + if let Some(raw) = self.raw.take() { + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroySampler(self.info.id())); + } + + unsafe { + use hal::Device; + self.device.raw().destroy_sampler(raw); + } + } + } +} + +impl<A: HalApi> Sampler<A> { + pub(crate) fn raw(&self) -> &A::Sampler { + self.raw.as_ref().unwrap() + } +} + +#[derive(Copy, Clone)] +pub enum SamplerFilterErrorType { + MagFilter, + MinFilter, + MipmapFilter, +} + +impl Debug for SamplerFilterErrorType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match *self { + SamplerFilterErrorType::MagFilter => write!(f, "magFilter"), + SamplerFilterErrorType::MinFilter => write!(f, "minFilter"), + SamplerFilterErrorType::MipmapFilter => write!(f, "mipmapFilter"), + } + } +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CreateSamplerError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Invalid lodMinClamp: {0}. Must be greater or equal to 0.0")] + InvalidLodMinClamp(f32), + #[error("Invalid lodMaxClamp: {lod_max_clamp}. Must be greater or equal to lodMinClamp (which is {lod_min_clamp}).")] + InvalidLodMaxClamp { + lod_min_clamp: f32, + lod_max_clamp: f32, + }, + #[error("Invalid anisotropic clamp: {0}. Must be at least 1.")] + InvalidAnisotropy(u16), + #[error("Invalid filter mode for {filter_type:?}: {filter_mode:?}. When anistropic clamp is not 1 (it is {anisotropic_clamp}), all filter modes must be linear.")] + InvalidFilterModeWithAnisotropy { + filter_type: SamplerFilterErrorType, + filter_mode: wgt::FilterMode, + anisotropic_clamp: u16, + }, + #[error("Cannot create any more samplers")] + TooManyObjects, + /// AddressMode::ClampToBorder requires feature ADDRESS_MODE_CLAMP_TO_BORDER. + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), +} + +impl<A: HalApi> Resource for Sampler<A> { + const TYPE: ResourceType = "Sampler"; + + type Marker = crate::id::markers::Sampler; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum CreateQuerySetError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("QuerySets cannot be made with zero queries")] + ZeroCount, + #[error("{count} is too many queries for a single QuerySet. QuerySets cannot be made more than {maximum} queries.")] + TooManyQueries { count: u32, maximum: u32 }, + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), +} + +pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>; + +#[derive(Debug)] +pub struct QuerySet<A: HalApi> { + pub(crate) raw: Option<A::QuerySet>, + pub(crate) device: Arc<Device<A>>, + pub(crate) info: ResourceInfo<Self>, + pub(crate) desc: wgt::QuerySetDescriptor<()>, +} + +impl<A: HalApi> Drop for QuerySet<A> { + fn drop(&mut self) { + resource_log!("Destroy raw QuerySet {:?}", self.info.label()); + if let Some(raw) = self.raw.take() { + #[cfg(feature = "trace")] + if let Some(t) = self.device.trace.lock().as_mut() { + t.add(trace::Action::DestroyQuerySet(self.info.id())); + } + + unsafe { + use hal::Device; + self.device.raw().destroy_query_set(raw); + } + } + } +} + +impl<A: HalApi> Resource for QuerySet<A> { + const TYPE: ResourceType = "QuerySet"; + + type Marker = crate::id::markers::QuerySet; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +impl<A: HalApi> QuerySet<A> { + pub(crate) fn raw(&self) -> &A::QuerySet { + self.raw.as_ref().unwrap() + } +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum DestroyError { + #[error("Resource is invalid")] + Invalid, + #[error("Resource is already destroyed")] + AlreadyDestroyed, +} diff --git a/third_party/rust/wgpu-core/src/snatch.rs b/third_party/rust/wgpu-core/src/snatch.rs new file mode 100644 index 0000000000..2324d33574 --- /dev/null +++ b/third_party/rust/wgpu-core/src/snatch.rs @@ -0,0 +1,91 @@ +#![allow(unused)] + +use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; +use std::cell::UnsafeCell; + +/// A guard that provides read access to snatchable data. +pub struct SnatchGuard<'a>(RwLockReadGuard<'a, ()>); +/// A guard that allows snatching the snatchable data. +pub struct ExclusiveSnatchGuard<'a>(RwLockWriteGuard<'a, ()>); + +/// A value that is mostly immutable but can be "snatched" if we need to destroy +/// it early. +/// +/// In order to safely access the underlying data, the device's global snatchable +/// lock must be taken. To guarantee it, methods take a read or write guard of that +/// special lock. +pub struct Snatchable<T> { + value: UnsafeCell<Option<T>>, +} + +impl<T> Snatchable<T> { + pub fn new(val: T) -> Self { + Snatchable { + value: UnsafeCell::new(Some(val)), + } + } + + /// Get read access to the value. Requires a the snatchable lock's read guard. + pub fn get(&self, _guard: &SnatchGuard) -> Option<&T> { + unsafe { (*self.value.get()).as_ref() } + } + + /// Get write access to the value. Requires a the snatchable lock's write guard. + pub fn get_mut(&self, _guard: &mut ExclusiveSnatchGuard) -> Option<&mut T> { + unsafe { (*self.value.get()).as_mut() } + } + + /// Take the value. Requires a the snatchable lock's write guard. + pub fn snatch(&self, _guard: ExclusiveSnatchGuard) -> Option<T> { + unsafe { (*self.value.get()).take() } + } + + /// Take the value without a guard. This can only be used with exclusive access + /// to self, so it does not require locking. + /// + /// Typically useful in a drop implementation. + pub fn take(&mut self) -> Option<T> { + self.value.get_mut().take() + } +} + +// Can't safely print the contents of a snatchable object without holding +// the lock. +impl<T> std::fmt::Debug for Snatchable<T> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "<snatchable>") + } +} + +unsafe impl<T> Sync for Snatchable<T> {} + +/// A Device-global lock for all snatchable data. +pub struct SnatchLock { + lock: RwLock<()>, +} + +impl SnatchLock { + /// The safety of `Snatchable::get` and `Snatchable::snatch` rely on their using of the + /// right SnatchLock (the one associated to the same device). This method is unsafe + /// to force force sers to think twice about creating a SnatchLock. The only place this + /// method should be called is when creating the device. + pub unsafe fn new() -> Self { + SnatchLock { + lock: RwLock::new(()), + } + } + + /// Request read access to snatchable resources. + pub fn read(&self) -> SnatchGuard { + SnatchGuard(self.lock.read()) + } + + /// Request write access to snatchable resources. + /// + /// This should only be called when a resource needs to be snatched. This has + /// a high risk of causing lock contention if called concurrently with other + /// wgpu work. + pub fn write(&self) -> ExclusiveSnatchGuard { + ExclusiveSnatchGuard(self.lock.write()) + } +} diff --git a/third_party/rust/wgpu-core/src/storage.rs b/third_party/rust/wgpu-core/src/storage.rs new file mode 100644 index 0000000000..554ced8b7d --- /dev/null +++ b/third_party/rust/wgpu-core/src/storage.rs @@ -0,0 +1,230 @@ +use std::ops; +use std::sync::Arc; + +use wgt::Backend; + +use crate::id::Id; +use crate::resource::Resource; +use crate::{Epoch, Index}; + +/// An entry in a `Storage::map` table. +#[derive(Debug)] +pub(crate) enum Element<T> { + /// There are no live ids with this index. + Vacant, + + /// There is one live id with this index, allocated at the given + /// epoch. + Occupied(Arc<T>, Epoch), + + /// Like `Occupied`, but an error occurred when creating the + /// resource. + /// + /// The given `String` is the resource's descriptor label. + Error(Epoch, String), +} + +#[derive(Clone, Debug)] +pub(crate) struct InvalidId; + +/// A table of `T` values indexed by the id type `I`. +/// +/// The table is represented as a vector indexed by the ids' index +/// values, so you should use an id allocator like `IdentityManager` +/// that keeps the index values dense and close to zero. +#[derive(Debug)] +pub struct Storage<T> +where + T: Resource, +{ + pub(crate) map: Vec<Element<T>>, + kind: &'static str, +} + +impl<T> ops::Index<Id<T::Marker>> for Storage<T> +where + T: Resource, +{ + type Output = Arc<T>; + fn index(&self, id: Id<T::Marker>) -> &Arc<T> { + self.get(id).unwrap() + } +} +impl<T> Storage<T> +where + T: Resource, +{ + pub(crate) fn new() -> Self { + Self { + map: Vec::new(), + kind: T::TYPE, + } + } +} + +impl<T> Storage<T> +where + T: Resource, +{ + #[allow(dead_code)] + pub(crate) fn contains(&self, id: Id<T::Marker>) -> bool { + let (index, epoch, _) = id.unzip(); + match self.map.get(index as usize) { + Some(&Element::Vacant) => false, + Some(&Element::Occupied(_, storage_epoch) | &Element::Error(storage_epoch, _)) => { + storage_epoch == epoch + } + None => false, + } + } + + /// Attempts to get a reference to an item behind a potentially invalid ID. + /// + /// Returns [`None`] if there is an epoch mismatch, or the entry is empty. + /// + /// This function is primarily intended for the `as_hal` family of functions + /// where you may need to fallibly get a object backed by an id that could + /// be in a different hub. + pub(crate) fn try_get(&self, id: Id<T::Marker>) -> Result<Option<&Arc<T>>, InvalidId> { + let (index, epoch, _) = id.unzip(); + let (result, storage_epoch) = match self.map.get(index as usize) { + Some(&Element::Occupied(ref v, epoch)) => (Ok(Some(v)), epoch), + Some(&Element::Vacant) => return Ok(None), + Some(&Element::Error(epoch, ..)) => (Err(InvalidId), epoch), + None => return Err(InvalidId), + }; + assert_eq!( + epoch, storage_epoch, + "{}[{:?}] is no longer alive", + self.kind, id + ); + result + } + + /// Get a reference to an item behind a potentially invalid ID. + /// Panics if there is an epoch mismatch, or the entry is empty. + pub(crate) fn get(&self, id: Id<T::Marker>) -> Result<&Arc<T>, InvalidId> { + let (index, epoch, _) = id.unzip(); + let (result, storage_epoch) = match self.map.get(index as usize) { + Some(&Element::Occupied(ref v, epoch)) => (Ok(v), epoch), + Some(&Element::Vacant) => panic!("{}[{:?}] does not exist", self.kind, id), + Some(&Element::Error(epoch, ..)) => (Err(InvalidId), epoch), + None => return Err(InvalidId), + }; + assert_eq!( + epoch, storage_epoch, + "{}[{:?}] is no longer alive", + self.kind, id + ); + result + } + + /// Get an owned reference to an item behind a potentially invalid ID. + /// Panics if there is an epoch mismatch, or the entry is empty. + pub(crate) fn get_owned(&self, id: Id<T::Marker>) -> Result<Arc<T>, InvalidId> { + Ok(Arc::clone(self.get(id)?)) + } + + pub(crate) fn label_for_invalid_id(&self, id: Id<T::Marker>) -> &str { + let (index, _, _) = id.unzip(); + match self.map.get(index as usize) { + Some(Element::Error(_, label)) => label, + _ => "", + } + } + + fn insert_impl(&mut self, index: usize, epoch: Epoch, element: Element<T>) { + if index >= self.map.len() { + self.map.resize_with(index + 1, || Element::Vacant); + } + match std::mem::replace(&mut self.map[index], element) { + Element::Vacant => {} + Element::Occupied(_, storage_epoch) => { + assert_ne!( + epoch, + storage_epoch, + "Index {index:?} of {} is already occupied", + T::TYPE + ); + } + Element::Error(storage_epoch, _) => { + assert_ne!( + epoch, + storage_epoch, + "Index {index:?} of {} is already occupied with Error", + T::TYPE + ); + } + } + } + + pub(crate) fn insert(&mut self, id: Id<T::Marker>, value: Arc<T>) { + log::trace!("User is inserting {}{:?}", T::TYPE, id); + let (index, epoch, _backend) = id.unzip(); + self.insert_impl(index as usize, epoch, Element::Occupied(value, epoch)) + } + + pub(crate) fn insert_error(&mut self, id: Id<T::Marker>, label: &str) { + log::trace!("User is inserting as error {}{:?}", T::TYPE, id); + let (index, epoch, _) = id.unzip(); + self.insert_impl( + index as usize, + epoch, + Element::Error(epoch, label.to_string()), + ) + } + + pub(crate) fn replace_with_error(&mut self, id: Id<T::Marker>) -> Result<Arc<T>, InvalidId> { + let (index, epoch, _) = id.unzip(); + match std::mem::replace( + &mut self.map[index as usize], + Element::Error(epoch, String::new()), + ) { + Element::Vacant => panic!("Cannot access vacant resource"), + Element::Occupied(value, storage_epoch) => { + assert_eq!(epoch, storage_epoch); + Ok(value) + } + _ => Err(InvalidId), + } + } + + pub(crate) fn force_replace(&mut self, id: Id<T::Marker>, value: T) { + log::trace!("User is replacing {}{:?}", T::TYPE, id); + let (index, epoch, _) = id.unzip(); + self.map[index as usize] = Element::Occupied(Arc::new(value), epoch); + } + + pub(crate) fn remove(&mut self, id: Id<T::Marker>) -> Option<Arc<T>> { + log::trace!("User is removing {}{:?}", T::TYPE, id); + let (index, epoch, _) = id.unzip(); + match std::mem::replace(&mut self.map[index as usize], Element::Vacant) { + Element::Occupied(value, storage_epoch) => { + assert_eq!(epoch, storage_epoch); + Some(value) + } + Element::Error(..) => None, + Element::Vacant => panic!("Cannot remove a vacant resource"), + } + } + + pub(crate) fn iter(&self, backend: Backend) -> impl Iterator<Item = (Id<T::Marker>, &Arc<T>)> { + self.map + .iter() + .enumerate() + .filter_map(move |(index, x)| match *x { + Element::Occupied(ref value, storage_epoch) => { + Some((Id::zip(index as Index, storage_epoch, backend), value)) + } + _ => None, + }) + } + + pub(crate) fn kind(&self) -> &str { + self.kind + } + + pub(crate) fn len(&self) -> usize { + self.map.len() + } +} diff --git a/third_party/rust/wgpu-core/src/track/buffer.rs b/third_party/rust/wgpu-core/src/track/buffer.rs new file mode 100644 index 0000000000..323d2dab9d --- /dev/null +++ b/third_party/rust/wgpu-core/src/track/buffer.rs @@ -0,0 +1,838 @@ +/*! Buffer Trackers + * + * Buffers are represented by a single state for the whole resource, + * a 16 bit bitflag of buffer usages. Because there is only ever + * one subresource, they have no selector. +!*/ + +use std::{borrow::Cow, marker::PhantomData, sync::Arc}; + +use super::{PendingTransition, ResourceTracker}; +use crate::{ + hal_api::HalApi, + id::BufferId, + resource::{Buffer, Resource}, + snatch::SnatchGuard, + storage::Storage, + track::{ + invalid_resource_state, skip_barrier, ResourceMetadata, ResourceMetadataProvider, + ResourceUses, UsageConflict, + }, +}; +use hal::{BufferBarrier, BufferUses}; +use parking_lot::Mutex; +use wgt::{strict_assert, strict_assert_eq}; + +impl ResourceUses for BufferUses { + const EXCLUSIVE: Self = Self::EXCLUSIVE; + + type Selector = (); + + fn bits(self) -> u16 { + Self::bits(&self) + } + + fn all_ordered(self) -> bool { + Self::ORDERED.contains(self) + } + + fn any_exclusive(self) -> bool { + self.intersects(Self::EXCLUSIVE) + } +} + +/// Stores all the buffers that a bind group stores. +#[derive(Debug)] +pub(crate) struct BufferBindGroupState<A: HalApi> { + buffers: Mutex<Vec<(Arc<Buffer<A>>, BufferUses)>>, + + _phantom: PhantomData<A>, +} +impl<A: HalApi> BufferBindGroupState<A> { + pub fn new() -> Self { + Self { + buffers: Mutex::new(Vec::new()), + + _phantom: PhantomData, + } + } + + /// Optimize the buffer bind group state by sorting it by ID. + /// + /// When this list of states is merged into a tracker, the memory + /// accesses will be in a constant ascending order. + #[allow(clippy::pattern_type_mismatch)] + pub(crate) fn optimize(&self) { + let mut buffers = self.buffers.lock(); + buffers.sort_unstable_by_key(|(b, _)| b.as_info().id().unzip().0); + } + + /// Returns a list of all buffers tracked. May contain duplicates. + #[allow(clippy::pattern_type_mismatch)] + pub fn used_ids(&self) -> impl Iterator<Item = BufferId> + '_ { + let buffers = self.buffers.lock(); + buffers + .iter() + .map(|(ref b, _)| b.as_info().id()) + .collect::<Vec<_>>() + .into_iter() + } + + /// Returns a list of all buffers tracked. May contain duplicates. + pub fn drain_resources(&self) -> impl Iterator<Item = Arc<Buffer<A>>> + '_ { + let mut buffers = self.buffers.lock(); + buffers + .drain(..) + .map(|(buffer, _u)| buffer) + .collect::<Vec<_>>() + .into_iter() + } + + /// Adds the given resource with the given state. + pub fn add_single<'a>( + &self, + storage: &'a Storage<Buffer<A>>, + id: BufferId, + state: BufferUses, + ) -> Option<&'a Arc<Buffer<A>>> { + let buffer = storage.get(id).ok()?; + + let mut buffers = self.buffers.lock(); + buffers.push((buffer.clone(), state)); + + Some(buffer) + } +} + +/// Stores all buffer state within a single usage scope. +#[derive(Debug)] +pub(crate) struct BufferUsageScope<A: HalApi> { + state: Vec<BufferUses>, + + metadata: ResourceMetadata<Buffer<A>>, +} + +impl<A: HalApi> BufferUsageScope<A> { + pub fn new() -> Self { + Self { + state: Vec::new(), + + metadata: ResourceMetadata::new(), + } + } + + fn tracker_assert_in_bounds(&self, index: usize) { + strict_assert!(index < self.state.len()); + self.metadata.tracker_assert_in_bounds(index); + } + + /// Sets the size of all the vectors inside the tracker. + /// + /// Must be called with the highest possible Buffer ID before + /// all unsafe functions are called. + pub fn set_size(&mut self, size: usize) { + self.state.resize(size, BufferUses::empty()); + self.metadata.set_size(size); + } + + /// Extend the vectors to let the given index be valid. + fn allow_index(&mut self, index: usize) { + if index >= self.state.len() { + self.set_size(index + 1); + } + } + + /// Drains all buffers tracked. + pub fn drain_resources(&mut self) -> impl Iterator<Item = Arc<Buffer<A>>> + '_ { + let resources = self.metadata.drain_resources(); + self.state.clear(); + resources.into_iter() + } + + pub fn get(&self, id: BufferId) -> Option<&Arc<Buffer<A>>> { + let index = id.unzip().0 as usize; + if index > self.metadata.size() { + return None; + } + self.tracker_assert_in_bounds(index); + unsafe { + if self.metadata.contains_unchecked(index) { + return Some(self.metadata.get_resource_unchecked(index)); + } + } + None + } + + /// Merge the list of buffer states in the given bind group into this usage scope. + /// + /// If any of the resulting states is invalid, stops the merge and returns a usage + /// conflict with the details of the invalid state. + /// + /// Because bind groups do not check if the union of all their states is valid, + /// this method is allowed to return Err on the first bind group bound. + /// + /// # Safety + /// + /// [`Self::set_size`] must be called with the maximum possible Buffer ID before this + /// method is called. + pub unsafe fn merge_bind_group( + &mut self, + bind_group: &BufferBindGroupState<A>, + ) -> Result<(), UsageConflict> { + let buffers = bind_group.buffers.lock(); + for &(ref resource, state) in &*buffers { + let index = resource.as_info().id().unzip().0 as usize; + + unsafe { + insert_or_merge( + None, + &mut self.state, + &mut self.metadata, + index as _, + index, + BufferStateProvider::Direct { state }, + ResourceMetadataProvider::Direct { + resource: Cow::Borrowed(resource), + }, + )? + }; + } + + Ok(()) + } + + /// Merge the list of buffer states in the given usage scope into this UsageScope. + /// + /// If any of the resulting states is invalid, stops the merge and returns a usage + /// conflict with the details of the invalid state. + /// + /// If the given tracker uses IDs higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn merge_usage_scope(&mut self, scope: &Self) -> Result<(), UsageConflict> { + let incoming_size = scope.state.len(); + if incoming_size > self.state.len() { + self.set_size(incoming_size); + } + + for index in scope.metadata.owned_indices() { + self.tracker_assert_in_bounds(index); + scope.tracker_assert_in_bounds(index); + + unsafe { + insert_or_merge( + None, + &mut self.state, + &mut self.metadata, + index as u32, + index, + BufferStateProvider::Indirect { + state: &scope.state, + }, + ResourceMetadataProvider::Indirect { + metadata: &scope.metadata, + }, + )?; + }; + } + + Ok(()) + } + + /// Merge a single state into the UsageScope. + /// + /// If the resulting state is invalid, returns a usage + /// conflict with the details of the invalid state. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn merge_single<'a>( + &mut self, + storage: &'a Storage<Buffer<A>>, + id: BufferId, + new_state: BufferUses, + ) -> Result<&'a Arc<Buffer<A>>, UsageConflict> { + let buffer = storage + .get(id) + .map_err(|_| UsageConflict::BufferInvalid { id })?; + + let index = id.unzip().0 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + insert_or_merge( + None, + &mut self.state, + &mut self.metadata, + index as _, + index, + BufferStateProvider::Direct { state: new_state }, + ResourceMetadataProvider::Direct { + resource: Cow::Owned(buffer.clone()), + }, + )?; + } + + Ok(buffer) + } +} + +pub(crate) type SetSingleResult<A> = + Option<(Arc<Buffer<A>>, Option<PendingTransition<BufferUses>>)>; + +/// Stores all buffer state within a command buffer or device. +pub(crate) struct BufferTracker<A: HalApi> { + start: Vec<BufferUses>, + end: Vec<BufferUses>, + + metadata: ResourceMetadata<Buffer<A>>, + + temp: Vec<PendingTransition<BufferUses>>, +} + +impl<A: HalApi> ResourceTracker<Buffer<A>> for BufferTracker<A> { + /// Try to remove the buffer `id` from this tracker if it is otherwise unused. + /// + /// A buffer is 'otherwise unused' when the only references to it are: + /// + /// 1) the `Arc` that our caller, `LifetimeTracker::triage_suspected`, has just + /// drained from `LifetimeTracker::suspected_resources`, + /// + /// 2) its `Arc` in [`self.metadata`] (owned by [`Device::trackers`]), and + /// + /// 3) its `Arc` in the [`Hub::buffers`] registry. + /// + /// If the buffer is indeed unused, this function removes 2), and + /// `triage_suspected` will remove 3), leaving 1) as the sole + /// remaining reference. + /// + /// Returns true if the resource was removed or if not existing in metadata. + /// + /// [`Device::trackers`]: crate::device::Device + /// [`self.metadata`]: BufferTracker::metadata + /// [`Hub::buffers`]: crate::hub::Hub::buffers + fn remove_abandoned(&mut self, id: BufferId) -> bool { + let index = id.unzip().0 as usize; + + if index > self.metadata.size() { + return false; + } + + self.tracker_assert_in_bounds(index); + + unsafe { + if self.metadata.contains_unchecked(index) { + let existing_ref_count = self.metadata.get_ref_count_unchecked(index); + //RefCount 2 means that resource is hold just by DeviceTracker and this suspected resource itself + //so it's already been released from user and so it's not inside Registry\Storage + if existing_ref_count <= 2 { + self.metadata.remove(index); + log::trace!("Buffer {:?} is not tracked anymore", id,); + return true; + } else { + log::trace!( + "Buffer {:?} is still referenced from {}", + id, + existing_ref_count + ); + return false; + } + } + } + true + } +} + +impl<A: HalApi> BufferTracker<A> { + pub fn new() -> Self { + Self { + start: Vec::new(), + end: Vec::new(), + + metadata: ResourceMetadata::new(), + + temp: Vec::new(), + } + } + + fn tracker_assert_in_bounds(&self, index: usize) { + strict_assert!(index < self.start.len()); + strict_assert!(index < self.end.len()); + self.metadata.tracker_assert_in_bounds(index); + } + + /// Sets the size of all the vectors inside the tracker. + /// + /// Must be called with the highest possible Buffer ID before + /// all unsafe functions are called. + pub fn set_size(&mut self, size: usize) { + self.start.resize(size, BufferUses::empty()); + self.end.resize(size, BufferUses::empty()); + + self.metadata.set_size(size); + } + + /// Extend the vectors to let the given index be valid. + fn allow_index(&mut self, index: usize) { + if index >= self.start.len() { + self.set_size(index + 1); + } + } + + /// Returns a list of all buffers tracked. + pub fn used_resources(&self) -> impl Iterator<Item = Arc<Buffer<A>>> + '_ { + self.metadata.owned_resources() + } + + /// Drains all currently pending transitions. + pub fn drain_transitions<'a, 'b: 'a>( + &'b mut self, + snatch_guard: &'a SnatchGuard<'a>, + ) -> impl Iterator<Item = BufferBarrier<'a, A>> { + let buffer_barriers = self.temp.drain(..).map(|pending| { + let buf = unsafe { self.metadata.get_resource_unchecked(pending.id as _) }; + pending.into_hal(buf, snatch_guard) + }); + buffer_barriers + } + + /// Inserts a single buffer and its state into the resource tracker. + /// + /// If the resource already exists in the tracker, this will panic. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn insert_single(&mut self, id: BufferId, resource: Arc<Buffer<A>>, state: BufferUses) { + let index = id.unzip().0 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + let currently_owned = self.metadata.contains_unchecked(index); + + if currently_owned { + panic!("Tried to insert buffer already tracked"); + } + + insert( + Some(&mut self.start), + &mut self.end, + &mut self.metadata, + index, + BufferStateProvider::Direct { state }, + None, + ResourceMetadataProvider::Direct { + resource: Cow::Owned(resource), + }, + ) + } + } + + /// Sets the state of a single buffer. + /// + /// If a transition is needed to get the buffer into the given state, that transition + /// is returned. No more than one transition is needed. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn set_single(&mut self, buffer: &Arc<Buffer<A>>, state: BufferUses) -> SetSingleResult<A> { + let index: usize = buffer.as_info().id().unzip().0 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + insert_or_barrier_update( + Some(&mut self.start), + &mut self.end, + &mut self.metadata, + index, + BufferStateProvider::Direct { state }, + None, + ResourceMetadataProvider::Direct { + resource: Cow::Owned(buffer.clone()), + }, + &mut self.temp, + ) + }; + + strict_assert!(self.temp.len() <= 1); + + Some((buffer.clone(), self.temp.pop())) + } + + /// Sets the given state for all buffers in the given tracker. + /// + /// If a transition is needed to get the buffers into the needed state, + /// those transitions are stored within the tracker. A subsequent + /// call to [`Self::drain_transitions`] is needed to get those transitions. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn set_from_tracker(&mut self, tracker: &Self) { + let incoming_size = tracker.start.len(); + if incoming_size > self.start.len() { + self.set_size(incoming_size); + } + + for index in tracker.metadata.owned_indices() { + self.tracker_assert_in_bounds(index); + tracker.tracker_assert_in_bounds(index); + unsafe { + insert_or_barrier_update( + Some(&mut self.start), + &mut self.end, + &mut self.metadata, + index, + BufferStateProvider::Indirect { + state: &tracker.start, + }, + Some(BufferStateProvider::Indirect { + state: &tracker.end, + }), + ResourceMetadataProvider::Indirect { + metadata: &tracker.metadata, + }, + &mut self.temp, + ) + } + } + } + + /// Sets the given state for all buffers in the given UsageScope. + /// + /// If a transition is needed to get the buffers into the needed state, + /// those transitions are stored within the tracker. A subsequent + /// call to [`Self::drain_transitions`] is needed to get those transitions. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn set_from_usage_scope(&mut self, scope: &BufferUsageScope<A>) { + let incoming_size = scope.state.len(); + if incoming_size > self.start.len() { + self.set_size(incoming_size); + } + + for index in scope.metadata.owned_indices() { + self.tracker_assert_in_bounds(index); + scope.tracker_assert_in_bounds(index); + unsafe { + insert_or_barrier_update( + Some(&mut self.start), + &mut self.end, + &mut self.metadata, + index, + BufferStateProvider::Indirect { + state: &scope.state, + }, + None, + ResourceMetadataProvider::Indirect { + metadata: &scope.metadata, + }, + &mut self.temp, + ) + } + } + } + + /// Iterates through all buffers in the given bind group and adopts + /// the state given for those buffers in the UsageScope. It also + /// removes all touched buffers from the usage scope. + /// + /// If a transition is needed to get the buffers into the needed state, + /// those transitions are stored within the tracker. A subsequent + /// call to [`Self::drain_transitions`] is needed to get those transitions. + /// + /// This is a really funky method used by Compute Passes to generate + /// barriers after a call to dispatch without needing to iterate + /// over all elements in the usage scope. We use each the + /// a given iterator of ids as a source of which IDs to look at. + /// All the IDs must have first been added to the usage scope. + /// + /// # Safety + /// + /// [`Self::set_size`] must be called with the maximum possible Buffer ID before this + /// method is called. + pub unsafe fn set_and_remove_from_usage_scope_sparse( + &mut self, + scope: &mut BufferUsageScope<A>, + id_source: impl IntoIterator<Item = BufferId>, + ) { + let incoming_size = scope.state.len(); + if incoming_size > self.start.len() { + self.set_size(incoming_size); + } + + for id in id_source { + let (index32, _, _) = id.unzip(); + let index = index32 as usize; + + scope.tracker_assert_in_bounds(index); + + if unsafe { !scope.metadata.contains_unchecked(index) } { + continue; + } + unsafe { + insert_or_barrier_update( + Some(&mut self.start), + &mut self.end, + &mut self.metadata, + index, + BufferStateProvider::Indirect { + state: &scope.state, + }, + None, + ResourceMetadataProvider::Indirect { + metadata: &scope.metadata, + }, + &mut self.temp, + ) + }; + + unsafe { scope.metadata.remove(index) }; + } + } + + #[allow(dead_code)] + pub fn get(&self, id: BufferId) -> Option<&Arc<Buffer<A>>> { + let index = id.unzip().0 as usize; + if index > self.metadata.size() { + return None; + } + self.tracker_assert_in_bounds(index); + unsafe { + if self.metadata.contains_unchecked(index) { + return Some(self.metadata.get_resource_unchecked(index)); + } + } + None + } +} + +/// Source of Buffer State. +#[derive(Debug, Clone)] +enum BufferStateProvider<'a> { + /// Get a state that was provided directly. + Direct { state: BufferUses }, + /// Get a state from an an array of states. + Indirect { state: &'a [BufferUses] }, +} +impl BufferStateProvider<'_> { + /// Gets the state from the provider, given a resource ID index. + /// + /// # Safety + /// + /// Index must be in bounds for the indirect source iff this is in the indirect state. + #[inline(always)] + unsafe fn get_state(&self, index: usize) -> BufferUses { + match *self { + BufferStateProvider::Direct { state } => state, + BufferStateProvider::Indirect { state } => { + strict_assert!(index < state.len()); + *unsafe { state.get_unchecked(index) } + } + } + } +} + +/// Does an insertion operation if the index isn't tracked +/// in the current metadata, otherwise merges the given state +/// with the current state. If the merging would cause +/// a conflict, returns that usage conflict. +/// +/// # Safety +/// +/// Indexes must be valid indexes into all arrays passed in +/// to this function, either directly or via metadata or provider structs. +#[inline(always)] +unsafe fn insert_or_merge<A: HalApi>( + start_states: Option<&mut [BufferUses]>, + current_states: &mut [BufferUses], + resource_metadata: &mut ResourceMetadata<Buffer<A>>, + index32: u32, + index: usize, + state_provider: BufferStateProvider<'_>, + metadata_provider: ResourceMetadataProvider<'_, Buffer<A>>, +) -> Result<(), UsageConflict> { + let currently_owned = unsafe { resource_metadata.contains_unchecked(index) }; + + if !currently_owned { + unsafe { + insert( + start_states, + current_states, + resource_metadata, + index, + state_provider, + None, + metadata_provider, + ) + }; + return Ok(()); + } + + unsafe { + merge( + current_states, + index32, + index, + state_provider, + metadata_provider, + ) + } +} + +/// If the resource isn't tracked +/// - Inserts the given resource. +/// - Uses the `start_state_provider` to populate `start_states` +/// - Uses either `end_state_provider` or `start_state_provider` +/// to populate `current_states`. +/// If the resource is tracked +/// - Inserts barriers from the state in `current_states` +/// to the state provided by `start_state_provider`. +/// - Updates the `current_states` with either the state from +/// `end_state_provider` or `start_state_provider`. +/// +/// Any barriers are added to the barrier vector. +/// +/// # Safety +/// +/// Indexes must be valid indexes into all arrays passed in +/// to this function, either directly or via metadata or provider structs. +#[inline(always)] +unsafe fn insert_or_barrier_update<A: HalApi>( + start_states: Option<&mut [BufferUses]>, + current_states: &mut [BufferUses], + resource_metadata: &mut ResourceMetadata<Buffer<A>>, + index: usize, + start_state_provider: BufferStateProvider<'_>, + end_state_provider: Option<BufferStateProvider<'_>>, + metadata_provider: ResourceMetadataProvider<'_, Buffer<A>>, + barriers: &mut Vec<PendingTransition<BufferUses>>, +) { + let currently_owned = unsafe { resource_metadata.contains_unchecked(index) }; + + if !currently_owned { + unsafe { + insert( + start_states, + current_states, + resource_metadata, + index, + start_state_provider, + end_state_provider, + metadata_provider, + ) + }; + return; + } + + let update_state_provider = end_state_provider.unwrap_or_else(|| start_state_provider.clone()); + unsafe { barrier(current_states, index, start_state_provider, barriers) }; + + unsafe { update(current_states, index, update_state_provider) }; +} + +#[inline(always)] +unsafe fn insert<A: HalApi>( + start_states: Option<&mut [BufferUses]>, + current_states: &mut [BufferUses], + resource_metadata: &mut ResourceMetadata<Buffer<A>>, + index: usize, + start_state_provider: BufferStateProvider<'_>, + end_state_provider: Option<BufferStateProvider<'_>>, + metadata_provider: ResourceMetadataProvider<'_, Buffer<A>>, +) { + let new_start_state = unsafe { start_state_provider.get_state(index) }; + let new_end_state = + end_state_provider.map_or(new_start_state, |p| unsafe { p.get_state(index) }); + + // This should only ever happen with a wgpu bug, but let's just double + // check that resource states don't have any conflicts. + strict_assert_eq!(invalid_resource_state(new_start_state), false); + strict_assert_eq!(invalid_resource_state(new_end_state), false); + + log::trace!("\tbuf {index}: insert {new_start_state:?}..{new_end_state:?}"); + + unsafe { + if let Some(&mut ref mut start_state) = start_states { + *start_state.get_unchecked_mut(index) = new_start_state; + } + *current_states.get_unchecked_mut(index) = new_end_state; + + let resource = metadata_provider.get_own(index); + resource_metadata.insert(index, resource); + } +} + +#[inline(always)] +unsafe fn merge<A: HalApi>( + current_states: &mut [BufferUses], + index32: u32, + index: usize, + state_provider: BufferStateProvider<'_>, + metadata_provider: ResourceMetadataProvider<'_, Buffer<A>>, +) -> Result<(), UsageConflict> { + let current_state = unsafe { current_states.get_unchecked_mut(index) }; + let new_state = unsafe { state_provider.get_state(index) }; + + let merged_state = *current_state | new_state; + + if invalid_resource_state(merged_state) { + return Err(UsageConflict::from_buffer( + BufferId::zip( + index32, + unsafe { metadata_provider.get_epoch(index) }, + A::VARIANT, + ), + *current_state, + new_state, + )); + } + + log::trace!("\tbuf {index32}: merge {current_state:?} + {new_state:?}"); + + *current_state = merged_state; + + Ok(()) +} + +#[inline(always)] +unsafe fn barrier( + current_states: &mut [BufferUses], + index: usize, + state_provider: BufferStateProvider<'_>, + barriers: &mut Vec<PendingTransition<BufferUses>>, +) { + let current_state = unsafe { *current_states.get_unchecked(index) }; + let new_state = unsafe { state_provider.get_state(index) }; + + if skip_barrier(current_state, new_state) { + return; + } + + barriers.push(PendingTransition { + id: index as _, + selector: (), + usage: current_state..new_state, + }); + + log::trace!("\tbuf {index}: transition {current_state:?} -> {new_state:?}"); +} + +#[inline(always)] +unsafe fn update( + current_states: &mut [BufferUses], + index: usize, + state_provider: BufferStateProvider<'_>, +) { + let current_state = unsafe { current_states.get_unchecked_mut(index) }; + let new_state = unsafe { state_provider.get_state(index) }; + + *current_state = new_state; +} diff --git a/third_party/rust/wgpu-core/src/track/metadata.rs b/third_party/rust/wgpu-core/src/track/metadata.rs new file mode 100644 index 0000000000..e5f4d5e969 --- /dev/null +++ b/third_party/rust/wgpu-core/src/track/metadata.rs @@ -0,0 +1,243 @@ +//! The `ResourceMetadata` type. + +use crate::{resource::Resource, Epoch}; +use bit_vec::BitVec; +use std::{borrow::Cow, mem, sync::Arc}; +use wgt::strict_assert; + +/// A set of resources, holding a `Arc<T>` and epoch for each member. +/// +/// Testing for membership is fast, and iterating over members is +/// reasonably fast in practice. Storage consumption is proportional +/// to the largest id index of any member, not to the number of +/// members, but a bit vector tracks occupancy, so iteration touches +/// only occupied elements. +#[derive(Debug)] +pub(super) struct ResourceMetadata<T: Resource> { + /// If the resource with index `i` is a member, `owned[i]` is `true`. + owned: BitVec<usize>, + + /// A vector holding clones of members' `T`s. + resources: Vec<Option<Arc<T>>>, +} + +impl<T: Resource> ResourceMetadata<T> { + pub(super) fn new() -> Self { + Self { + owned: BitVec::default(), + resources: Vec::new(), + } + } + + /// Returns the number of indices we can accommodate. + pub(super) fn size(&self) -> usize { + self.owned.len() + } + + pub(super) fn set_size(&mut self, size: usize) { + self.resources.resize(size, None); + resize_bitvec(&mut self.owned, size); + } + + /// Ensures a given index is in bounds for all arrays and does + /// sanity checks of the presence of a refcount. + /// + /// In release mode this function is completely empty and is removed. + #[cfg_attr(not(feature = "strict_asserts"), allow(unused_variables))] + pub(super) fn tracker_assert_in_bounds(&self, index: usize) { + strict_assert!(index < self.owned.len()); + strict_assert!(index < self.resources.len()); + strict_assert!(if self.contains(index) { + self.resources[index].is_some() + } else { + true + }); + } + + /// Returns true if the tracker owns no resources. + /// + /// This is a O(n) operation. + pub(super) fn is_empty(&self) -> bool { + !self.owned.any() + } + + /// Returns true if the set contains the resource with the given index. + pub(super) fn contains(&self, index: usize) -> bool { + self.owned[index] + } + + /// Returns true if the set contains the resource with the given index. + /// + /// # Safety + /// + /// The given `index` must be in bounds for this `ResourceMetadata`'s + /// existing tables. See `tracker_assert_in_bounds`. + #[inline(always)] + pub(super) unsafe fn contains_unchecked(&self, index: usize) -> bool { + unsafe { self.owned.get(index).unwrap_unchecked() } + } + + /// Insert a resource into the set. + /// + /// Add the resource with the given index, epoch, and reference count to the + /// set. + /// + /// # Safety + /// + /// The given `index` must be in bounds for this `ResourceMetadata`'s + /// existing tables. See `tracker_assert_in_bounds`. + #[inline(always)] + pub(super) unsafe fn insert(&mut self, index: usize, resource: Arc<T>) { + self.owned.set(index, true); + unsafe { + *self.resources.get_unchecked_mut(index) = Some(resource); + } + } + + /// Get the resource with the given index. + /// + /// # Safety + /// + /// The given `index` must be in bounds for this `ResourceMetadata`'s + /// existing tables. See `tracker_assert_in_bounds`. + #[inline(always)] + pub(super) unsafe fn get_resource_unchecked(&self, index: usize) -> &Arc<T> { + unsafe { + self.resources + .get_unchecked(index) + .as_ref() + .unwrap_unchecked() + } + } + + /// Get the reference count of the resource with the given index. + /// + /// # Safety + /// + /// The given `index` must be in bounds for this `ResourceMetadata`'s + /// existing tables. See `tracker_assert_in_bounds`. + #[inline(always)] + pub(super) unsafe fn get_ref_count_unchecked(&self, index: usize) -> usize { + unsafe { Arc::strong_count(self.get_resource_unchecked(index)) } + } + + /// Returns an iterator over the resources owned by `self`. + pub(super) fn owned_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ { + if !self.owned.is_empty() { + self.tracker_assert_in_bounds(self.owned.len() - 1) + }; + iterate_bitvec_indices(&self.owned).map(move |index| { + let resource = unsafe { self.resources.get_unchecked(index) }; + resource.as_ref().unwrap().clone() + }) + } + + /// Returns an iterator over the resources owned by `self`. + pub(super) fn drain_resources(&mut self) -> Vec<Arc<T>> { + if !self.owned.is_empty() { + self.tracker_assert_in_bounds(self.owned.len() - 1) + }; + let mut resources = Vec::new(); + iterate_bitvec_indices(&self.owned).for_each(|index| { + let resource = unsafe { self.resources.get_unchecked(index) }; + resources.push(resource.as_ref().unwrap().clone()); + }); + self.owned.clear(); + self.resources.clear(); + resources + } + + /// Returns an iterator over the indices of all resources owned by `self`. + pub(super) fn owned_indices(&self) -> impl Iterator<Item = usize> + '_ { + if !self.owned.is_empty() { + self.tracker_assert_in_bounds(self.owned.len() - 1) + }; + iterate_bitvec_indices(&self.owned) + } + + /// Remove the resource with the given index from the set. + pub(super) unsafe fn remove(&mut self, index: usize) { + unsafe { + *self.resources.get_unchecked_mut(index) = None; + } + self.owned.set(index, false); + } +} + +/// A source of resource metadata. +/// +/// This is used to abstract over the various places +/// trackers can get new resource metadata from. +pub(super) enum ResourceMetadataProvider<'a, T: Resource> { + /// Comes directly from explicit values. + Direct { resource: Cow<'a, Arc<T>> }, + /// Comes from another metadata tracker. + Indirect { metadata: &'a ResourceMetadata<T> }, +} +impl<T: Resource> ResourceMetadataProvider<'_, T> { + /// Get the epoch and an owned refcount from this. + /// + /// # Safety + /// + /// - The index must be in bounds of the metadata tracker if this uses an indirect source. + /// - info must be Some if this uses a Resource source. + #[inline(always)] + pub(super) unsafe fn get_own(self, index: usize) -> Arc<T> { + match self { + ResourceMetadataProvider::Direct { resource } => resource.into_owned(), + ResourceMetadataProvider::Indirect { metadata } => { + metadata.tracker_assert_in_bounds(index); + { + let resource = unsafe { metadata.resources.get_unchecked(index) }; + unsafe { resource.clone().unwrap_unchecked() } + } + } + } + } + /// Get the epoch from this. + /// + /// # Safety + /// + /// - The index must be in bounds of the metadata tracker if this uses an indirect source. + #[inline(always)] + pub(super) unsafe fn get_epoch(self, index: usize) -> Epoch { + unsafe { self.get_own(index).as_info().id().unzip().1 } + } +} + +/// Resizes the given bitvec to the given size. I'm not sure why this is hard to do but it is. +fn resize_bitvec<B: bit_vec::BitBlock>(vec: &mut BitVec<B>, size: usize) { + let owned_size_to_grow = size.checked_sub(vec.len()); + if let Some(delta) = owned_size_to_grow { + if delta != 0 { + vec.grow(delta, false); + } + } else { + vec.truncate(size); + } +} + +/// Produces an iterator that yields the indexes of all bits that are set in the bitvec. +/// +/// Will skip entire usize's worth of bits if they are all false. +fn iterate_bitvec_indices(ownership: &BitVec<usize>) -> impl Iterator<Item = usize> + '_ { + const BITS_PER_BLOCK: usize = mem::size_of::<usize>() * 8; + + let size = ownership.len(); + + ownership + .blocks() + .enumerate() + .filter(|&(_, word)| word != 0) + .flat_map(move |(word_index, mut word)| { + let bit_start = word_index * BITS_PER_BLOCK; + let bit_end = (bit_start + BITS_PER_BLOCK).min(size); + + (bit_start..bit_end).filter(move |_| { + let active = word & 0b1 != 0; + word >>= 1; + + active + }) + }) +} diff --git a/third_party/rust/wgpu-core/src/track/mod.rs b/third_party/rust/wgpu-core/src/track/mod.rs new file mode 100644 index 0000000000..a36280d03b --- /dev/null +++ b/third_party/rust/wgpu-core/src/track/mod.rs @@ -0,0 +1,617 @@ +/*! Resource State and Lifetime Trackers + +These structures are responsible for keeping track of resource state, +generating barriers where needed, and making sure resources are kept +alive until the trackers die. + +## General Architecture + +Tracking is some of the hottest code in the entire codebase, so the trackers +are designed to be as cache efficient as possible. They store resource state +in flat vectors, storing metadata SOA style, one vector per type of metadata. + +A lot of the tracker code is deeply unsafe, using unchecked accesses all over +to make performance as good as possible. However, for all unsafe accesses, there +is a corresponding debug assert the checks if that access is valid. This helps +get bugs caught fast, while still letting users not need to pay for the bounds +checks. + +In wgpu, each resource ID includes a bitfield holding an index. +Indices are allocated and re-used, so they will always be as low as +reasonably possible. This allows us to use IDs to index into an array +of tracking information. + +## Statefulness + +There are two main types of trackers, stateful and stateless. + +Stateful trackers are for buffers and textures. They both have +resource state attached to them which needs to be used to generate +automatic synchronization. Because of the different requirements of +buffers and textures, they have two separate tracking structures. + +Stateless trackers only store metadata and own the given resource. + +## Use Case + +Within each type of tracker, the trackers are further split into 3 different +use cases, Bind Group, Usage Scope, and a full Tracker. + +Bind Group trackers are just a list of different resources, their refcount, +and how they are used. Textures are used via a selector and a usage type. +Buffers by just a usage type. Stateless resources don't have a usage type. + +Usage Scope trackers are only for stateful resources. These trackers represent +a single [`UsageScope`] in the spec. When a use is added to a usage scope, +it is merged with all other uses of that resource in that scope. If there +is a usage conflict, merging will fail and an error will be reported. + +Full trackers represent a before and after state of a resource. These +are used for tracking on the device and on command buffers. The before +state represents the state the resource is first used as in the command buffer, +the after state is the state the command buffer leaves the resource in. +These double ended buffers can then be used to generate the needed transitions +between command buffers. + +## Dense Datastructure with Sparse Data + +This tracking system is based on having completely dense data, but trackers do +not always contain every resource. Some resources (or even most resources) go +unused in any given command buffer. So to help speed up the process of iterating +through possibly thousands of resources, we use a bit vector to represent if +a resource is in the buffer or not. This allows us extremely efficient memory +utilization, as well as being able to bail out of whole blocks of 32-64 resources +with a single usize comparison with zero. In practice this means that merging +partially resident buffers is extremely quick. + +The main advantage of this dense datastructure is that we can do merging +of trackers in an extremely efficient fashion that results in us doing linear +scans down a couple of buffers. CPUs and their caches absolutely eat this up. + +## Stateful Resource Operations + +All operations on stateful trackers boil down to one of four operations: +- `insert(tracker, new_state)` adds a resource with a given state to the tracker + for the first time. +- `merge(tracker, new_state)` merges this new state with the previous state, checking + for usage conflicts. +- `barrier(tracker, new_state)` compares the given state to the existing state and + generates the needed barriers. +- `update(tracker, new_state)` takes the given new state and overrides the old state. + +This allows us to compose the operations to form the various kinds of tracker merges +that need to happen in the codebase. For each resource in the given merger, the following +operation applies: + +```text +UsageScope <- Resource = insert(scope, usage) OR merge(scope, usage) +UsageScope <- UsageScope = insert(scope, scope) OR merge(scope, scope) +CommandBuffer <- UsageScope = insert(buffer.start, buffer.end, scope) + OR barrier(buffer.end, scope) + update(buffer.end, scope) +Device <- CommandBuffer = insert(device.start, device.end, buffer.start, buffer.end) + OR barrier(device.end, buffer.start) + update(device.end, buffer.end) +``` + +[`UsageScope`]: https://gpuweb.github.io/gpuweb/#programming-model-synchronization +*/ + +mod buffer; +mod metadata; +mod range; +mod stateless; +mod texture; + +use crate::{ + binding_model, command, conv, + hal_api::HalApi, + id::{self, Id}, + pipeline, resource, + snatch::SnatchGuard, + storage::Storage, +}; + +use parking_lot::RwLock; +use std::{fmt, ops}; +use thiserror::Error; + +pub(crate) use buffer::{BufferBindGroupState, BufferTracker, BufferUsageScope}; +use metadata::{ResourceMetadata, ResourceMetadataProvider}; +pub(crate) use stateless::{StatelessBindGroupSate, StatelessTracker}; +pub(crate) use texture::{ + TextureBindGroupState, TextureSelector, TextureTracker, TextureUsageScope, +}; +use wgt::strict_assert_ne; + +/// A structure containing all the information about a particular resource +/// transition. User code should be able to generate a pipeline barrier +/// based on the contents. +#[derive(Debug, PartialEq)] +pub(crate) struct PendingTransition<S: ResourceUses> { + pub id: u32, + pub selector: S::Selector, + pub usage: ops::Range<S>, +} + +pub(crate) type PendingTransitionList = Vec<PendingTransition<hal::TextureUses>>; + +impl PendingTransition<hal::BufferUses> { + /// Produce the hal barrier corresponding to the transition. + pub fn into_hal<'a, A: HalApi>( + self, + buf: &'a resource::Buffer<A>, + snatch_guard: &'a SnatchGuard<'a>, + ) -> hal::BufferBarrier<'a, A> { + let buffer = buf.raw.get(snatch_guard).expect("Buffer is destroyed"); + hal::BufferBarrier { + buffer, + usage: self.usage, + } + } +} + +impl PendingTransition<hal::TextureUses> { + /// Produce the hal barrier corresponding to the transition. + pub fn into_hal<'a, A: HalApi>(self, texture: &'a A::Texture) -> hal::TextureBarrier<'a, A> { + // These showing up in a barrier is always a bug + strict_assert_ne!(self.usage.start, hal::TextureUses::UNKNOWN); + strict_assert_ne!(self.usage.end, hal::TextureUses::UNKNOWN); + + let mip_count = self.selector.mips.end - self.selector.mips.start; + strict_assert_ne!(mip_count, 0); + let layer_count = self.selector.layers.end - self.selector.layers.start; + strict_assert_ne!(layer_count, 0); + + hal::TextureBarrier { + texture, + range: wgt::ImageSubresourceRange { + aspect: wgt::TextureAspect::All, + base_mip_level: self.selector.mips.start, + mip_level_count: Some(mip_count), + base_array_layer: self.selector.layers.start, + array_layer_count: Some(layer_count), + }, + usage: self.usage, + } + } +} + +/// The uses that a resource or subresource can be in. +pub(crate) trait ResourceUses: + fmt::Debug + ops::BitAnd<Output = Self> + ops::BitOr<Output = Self> + PartialEq + Sized + Copy +{ + /// All flags that are exclusive. + const EXCLUSIVE: Self; + + /// The selector used by this resource. + type Selector: fmt::Debug; + + /// Turn the resource into a pile of bits. + fn bits(self) -> u16; + /// Returns true if the all the uses are ordered. + fn all_ordered(self) -> bool; + /// Returns true if any of the uses are exclusive. + fn any_exclusive(self) -> bool; +} + +/// Returns true if the given states violates the usage scope rule +/// of any(inclusive) XOR one(exclusive) +fn invalid_resource_state<T: ResourceUses>(state: T) -> bool { + // Is power of two also means "is one bit set". We check for this as if + // we're in any exclusive state, we must only be in a single state. + state.any_exclusive() && !conv::is_power_of_two_u16(state.bits()) +} + +/// Returns true if the transition from one state to another does not require +/// a barrier. +fn skip_barrier<T: ResourceUses>(old_state: T, new_state: T) -> bool { + // If the state didn't change and all the usages are ordered, the hardware + // will guarantee the order of accesses, so we do not need to issue a barrier at all + old_state == new_state && old_state.all_ordered() +} + +#[derive(Clone, Debug, Error, Eq, PartialEq)] +pub enum UsageConflict { + #[error("Attempted to use invalid buffer")] + BufferInvalid { id: id::BufferId }, + #[error("Attempted to use invalid texture")] + TextureInvalid { id: id::TextureId }, + #[error("Attempted to use buffer with {invalid_use}.")] + Buffer { + id: id::BufferId, + invalid_use: InvalidUse<hal::BufferUses>, + }, + #[error("Attempted to use a texture (mips {mip_levels:?} layers {array_layers:?}) with {invalid_use}.")] + Texture { + id: id::TextureId, + mip_levels: ops::Range<u32>, + array_layers: ops::Range<u32>, + invalid_use: InvalidUse<hal::TextureUses>, + }, +} + +impl UsageConflict { + fn from_buffer( + id: id::BufferId, + current_state: hal::BufferUses, + new_state: hal::BufferUses, + ) -> Self { + Self::Buffer { + id, + invalid_use: InvalidUse { + current_state, + new_state, + }, + } + } + + fn from_texture( + id: id::TextureId, + selector: TextureSelector, + current_state: hal::TextureUses, + new_state: hal::TextureUses, + ) -> Self { + Self::Texture { + id, + mip_levels: selector.mips, + array_layers: selector.layers, + invalid_use: InvalidUse { + current_state, + new_state, + }, + } + } +} + +impl crate::error::PrettyError for UsageConflict { + fn fmt_pretty(&self, fmt: &mut crate::error::ErrorFormatter) { + fmt.error(self); + match *self { + Self::BufferInvalid { id } => { + fmt.buffer_label(&id); + } + Self::TextureInvalid { id } => { + fmt.texture_label(&id); + } + Self::Buffer { id, .. } => { + fmt.buffer_label(&id); + } + Self::Texture { id, .. } => { + fmt.texture_label(&id); + } + } + } +} + +/// Pretty print helper that shows helpful descriptions of a conflicting usage. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct InvalidUse<T> { + current_state: T, + new_state: T, +} + +impl<T: ResourceUses> fmt::Display for InvalidUse<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let current = self.current_state; + let new = self.new_state; + + let current_exclusive = current & T::EXCLUSIVE; + let new_exclusive = new & T::EXCLUSIVE; + + let exclusive = current_exclusive | new_exclusive; + + // The text starts with "tried to use X resource with {self}" + write!( + f, + "conflicting usages. Current usage {current:?} and new usage {new:?}. \ + {exclusive:?} is an exclusive usage and cannot be used with any other \ + usages within the usage scope (renderpass or compute dispatch)" + ) + } +} + +/// All the usages that a bind group contains. The uses are not deduplicated in any way +/// and may include conflicting uses. This is fully compliant by the WebGPU spec. +/// +/// All bind group states are sorted by their ID so that when adding to a tracker, +/// they are added in the most efficient order possible (ascending order). +#[derive(Debug)] +pub(crate) struct BindGroupStates<A: HalApi> { + pub buffers: BufferBindGroupState<A>, + pub textures: TextureBindGroupState<A>, + pub views: StatelessBindGroupSate<resource::TextureView<A>>, + pub samplers: StatelessBindGroupSate<resource::Sampler<A>>, +} + +impl<A: HalApi> BindGroupStates<A> { + pub fn new() -> Self { + Self { + buffers: BufferBindGroupState::new(), + textures: TextureBindGroupState::new(), + views: StatelessBindGroupSate::new(), + samplers: StatelessBindGroupSate::new(), + } + } + + /// Optimize the bind group states by sorting them by ID. + /// + /// When this list of states is merged into a tracker, the memory + /// accesses will be in a constant ascending order. + pub fn optimize(&mut self) { + self.buffers.optimize(); + self.textures.optimize(); + self.views.optimize(); + self.samplers.optimize(); + } +} + +/// This is a render bundle specific usage scope. It includes stateless resources +/// that are not normally included in a usage scope, but are used by render bundles +/// and need to be owned by the render bundles. +#[derive(Debug)] +pub(crate) struct RenderBundleScope<A: HalApi> { + pub buffers: RwLock<BufferUsageScope<A>>, + pub textures: RwLock<TextureUsageScope<A>>, + // Don't need to track views and samplers, they are never used directly, only by bind groups. + pub bind_groups: RwLock<StatelessTracker<binding_model::BindGroup<A>>>, + pub render_pipelines: RwLock<StatelessTracker<pipeline::RenderPipeline<A>>>, + pub query_sets: RwLock<StatelessTracker<resource::QuerySet<A>>>, +} + +impl<A: HalApi> RenderBundleScope<A> { + /// Create the render bundle scope and pull the maximum IDs from the hubs. + pub fn new( + buffers: &Storage<resource::Buffer<A>>, + textures: &Storage<resource::Texture<A>>, + bind_groups: &Storage<binding_model::BindGroup<A>>, + render_pipelines: &Storage<pipeline::RenderPipeline<A>>, + query_sets: &Storage<resource::QuerySet<A>>, + ) -> Self { + let value = Self { + buffers: RwLock::new(BufferUsageScope::new()), + textures: RwLock::new(TextureUsageScope::new()), + bind_groups: RwLock::new(StatelessTracker::new()), + render_pipelines: RwLock::new(StatelessTracker::new()), + query_sets: RwLock::new(StatelessTracker::new()), + }; + + value.buffers.write().set_size(buffers.len()); + value.textures.write().set_size(textures.len()); + value.bind_groups.write().set_size(bind_groups.len()); + value + .render_pipelines + .write() + .set_size(render_pipelines.len()); + value.query_sets.write().set_size(query_sets.len()); + + value + } + + /// Merge the inner contents of a bind group into the render bundle tracker. + /// + /// Only stateful things are merged in here, all other resources are owned + /// indirectly by the bind group. + /// + /// # Safety + /// + /// The maximum ID given by each bind group resource must be less than the + /// length of the storage given at the call to `new`. + pub unsafe fn merge_bind_group( + &mut self, + bind_group: &BindGroupStates<A>, + ) -> Result<(), UsageConflict> { + unsafe { self.buffers.write().merge_bind_group(&bind_group.buffers)? }; + unsafe { + self.textures + .write() + .merge_bind_group(&bind_group.textures)? + }; + + Ok(()) + } +} + +/// A usage scope tracker. Only needs to store stateful resources as stateless +/// resources cannot possibly have a usage conflict. +#[derive(Debug)] +pub(crate) struct UsageScope<A: HalApi> { + pub buffers: BufferUsageScope<A>, + pub textures: TextureUsageScope<A>, +} + +impl<A: HalApi> UsageScope<A> { + /// Create the render bundle scope and pull the maximum IDs from the hubs. + pub fn new( + buffers: &Storage<resource::Buffer<A>>, + textures: &Storage<resource::Texture<A>>, + ) -> Self { + let mut value = Self { + buffers: BufferUsageScope::new(), + textures: TextureUsageScope::new(), + }; + + value.buffers.set_size(buffers.len()); + value.textures.set_size(textures.len()); + + value + } + + /// Merge the inner contents of a bind group into the usage scope. + /// + /// Only stateful things are merged in here, all other resources are owned + /// indirectly by the bind group. + /// + /// # Safety + /// + /// The maximum ID given by each bind group resource must be less than the + /// length of the storage given at the call to `new`. + pub unsafe fn merge_bind_group( + &mut self, + bind_group: &BindGroupStates<A>, + ) -> Result<(), UsageConflict> { + unsafe { + self.buffers.merge_bind_group(&bind_group.buffers)?; + self.textures.merge_bind_group(&bind_group.textures)?; + } + + Ok(()) + } + + /// Merge the inner contents of a bind group into the usage scope. + /// + /// Only stateful things are merged in here, all other resources are owned + /// indirectly by a bind group or are merged directly into the command buffer tracker. + /// + /// # Safety + /// + /// The maximum ID given by each bind group resource must be less than the + /// length of the storage given at the call to `new`. + pub unsafe fn merge_render_bundle( + &mut self, + render_bundle: &RenderBundleScope<A>, + ) -> Result<(), UsageConflict> { + self.buffers + .merge_usage_scope(&*render_bundle.buffers.read())?; + self.textures + .merge_usage_scope(&*render_bundle.textures.read())?; + + Ok(()) + } +} + +pub(crate) trait ResourceTracker<R> +where + R: resource::Resource, +{ + fn remove_abandoned(&mut self, id: Id<R::Marker>) -> bool; +} + +/// A full double sided tracker used by CommandBuffers and the Device. +pub(crate) struct Tracker<A: HalApi> { + pub buffers: BufferTracker<A>, + pub textures: TextureTracker<A>, + pub views: StatelessTracker<resource::TextureView<A>>, + pub samplers: StatelessTracker<resource::Sampler<A>>, + pub bind_groups: StatelessTracker<binding_model::BindGroup<A>>, + pub compute_pipelines: StatelessTracker<pipeline::ComputePipeline<A>>, + pub render_pipelines: StatelessTracker<pipeline::RenderPipeline<A>>, + pub bundles: StatelessTracker<command::RenderBundle<A>>, + pub query_sets: StatelessTracker<resource::QuerySet<A>>, +} + +impl<A: HalApi> Tracker<A> { + pub fn new() -> Self { + Self { + buffers: BufferTracker::new(), + textures: TextureTracker::new(), + views: StatelessTracker::new(), + samplers: StatelessTracker::new(), + bind_groups: StatelessTracker::new(), + compute_pipelines: StatelessTracker::new(), + render_pipelines: StatelessTracker::new(), + bundles: StatelessTracker::new(), + query_sets: StatelessTracker::new(), + } + } + + /// Pull the maximum IDs from the hubs. + pub fn set_size( + &mut self, + buffers: Option<&Storage<resource::Buffer<A>>>, + textures: Option<&Storage<resource::Texture<A>>>, + views: Option<&Storage<resource::TextureView<A>>>, + samplers: Option<&Storage<resource::Sampler<A>>>, + bind_groups: Option<&Storage<binding_model::BindGroup<A>>>, + compute_pipelines: Option<&Storage<pipeline::ComputePipeline<A>>>, + render_pipelines: Option<&Storage<pipeline::RenderPipeline<A>>>, + bundles: Option<&Storage<command::RenderBundle<A>>>, + query_sets: Option<&Storage<resource::QuerySet<A>>>, + ) { + if let Some(buffers) = buffers { + self.buffers.set_size(buffers.len()); + }; + if let Some(textures) = textures { + self.textures.set_size(textures.len()); + }; + if let Some(views) = views { + self.views.set_size(views.len()); + }; + if let Some(samplers) = samplers { + self.samplers.set_size(samplers.len()); + }; + if let Some(bind_groups) = bind_groups { + self.bind_groups.set_size(bind_groups.len()); + }; + if let Some(compute_pipelines) = compute_pipelines { + self.compute_pipelines.set_size(compute_pipelines.len()); + } + if let Some(render_pipelines) = render_pipelines { + self.render_pipelines.set_size(render_pipelines.len()); + }; + if let Some(bundles) = bundles { + self.bundles.set_size(bundles.len()); + }; + if let Some(query_sets) = query_sets { + self.query_sets.set_size(query_sets.len()); + }; + } + + /// Iterates through all resources in the given bind group and adopts + /// the state given for those resources in the UsageScope. It also + /// removes all touched resources from the usage scope. + /// + /// If a transition is needed to get the resources into the needed + /// state, those transitions are stored within the tracker. A + /// subsequent call to [`BufferTracker::drain`] or + /// [`TextureTracker::drain`] is needed to get those transitions. + /// + /// This is a really funky method used by Compute Passes to generate + /// barriers after a call to dispatch without needing to iterate + /// over all elements in the usage scope. We use each the + /// bind group as a source of which IDs to look at. The bind groups + /// must have first been added to the usage scope. + /// + /// Only stateful things are merged in here, all other resources are owned + /// indirectly by the bind group. + /// + /// # Safety + /// + /// The maximum ID given by each bind group resource must be less than the + /// value given to `set_size` + pub unsafe fn set_and_remove_from_usage_scope_sparse( + &mut self, + scope: &mut UsageScope<A>, + bind_group: &BindGroupStates<A>, + ) { + unsafe { + self.buffers.set_and_remove_from_usage_scope_sparse( + &mut scope.buffers, + bind_group.buffers.used_ids(), + ) + }; + unsafe { + self.textures + .set_and_remove_from_usage_scope_sparse(&mut scope.textures, &bind_group.textures) + }; + } + + /// Tracks the stateless resources from the given renderbundle. It is expected + /// that the stateful resources will get merged into a usage scope first. + /// + /// # Safety + /// + /// The maximum ID given by each bind group resource must be less than the + /// value given to `set_size` + pub unsafe fn add_from_render_bundle( + &mut self, + render_bundle: &RenderBundleScope<A>, + ) -> Result<(), UsageConflict> { + self.bind_groups + .add_from_tracker(&*render_bundle.bind_groups.read()); + self.render_pipelines + .add_from_tracker(&*render_bundle.render_pipelines.read()); + self.query_sets + .add_from_tracker(&*render_bundle.query_sets.read()); + + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/track/range.rs b/third_party/rust/wgpu-core/src/track/range.rs new file mode 100644 index 0000000000..3961220c2c --- /dev/null +++ b/third_party/rust/wgpu-core/src/track/range.rs @@ -0,0 +1,206 @@ +//Note: this could be the only place where we need `SmallVec`. +//TODO: consider getting rid of it. +use smallvec::SmallVec; + +use std::{fmt::Debug, iter, ops::Range}; + +/// Structure that keeps track of a I -> T mapping, +/// optimized for a case where keys of the same values +/// are often grouped together linearly. +#[derive(Clone, Debug, PartialEq)] +pub(crate) struct RangedStates<I, T> { + /// List of ranges, each associated with a singe value. + /// Ranges of keys have to be non-intersecting and ordered. + ranges: SmallVec<[(Range<I>, T); 1]>, +} + +impl<I: Copy + Ord, T: Copy + PartialEq> RangedStates<I, T> { + pub fn from_range(range: Range<I>, value: T) -> Self { + Self { + ranges: iter::once((range, value)).collect(), + } + } + + /// Construct a new instance from a slice of ranges. + #[cfg(test)] + pub fn from_slice(values: &[(Range<I>, T)]) -> Self { + Self { + ranges: values.iter().cloned().collect(), + } + } + + pub fn iter(&self) -> impl Iterator<Item = &(Range<I>, T)> + Clone { + self.ranges.iter() + } + + pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut (Range<I>, T)> { + self.ranges.iter_mut() + } + + /// Check that all the ranges are non-intersecting and ordered. + /// Panics otherwise. + #[cfg(test)] + fn check_sanity(&self) { + for a in self.ranges.iter() { + assert!(a.0.start < a.0.end); + } + for (a, b) in self.ranges.iter().zip(self.ranges[1..].iter()) { + assert!(a.0.end <= b.0.start); + } + } + + /// Merge the neighboring ranges together, where possible. + pub fn coalesce(&mut self) { + let mut num_removed = 0; + let mut iter = self.ranges.iter_mut(); + let mut cur = match iter.next() { + Some(elem) => elem, + None => return, + }; + for next in iter { + if cur.0.end == next.0.start && cur.1 == next.1 { + num_removed += 1; + cur.0.end = next.0.end; + next.0.end = next.0.start; + } else { + cur = next; + } + } + if num_removed != 0 { + self.ranges.retain(|pair| pair.0.start != pair.0.end); + } + } + + pub fn iter_filter<'a>( + &'a self, + range: &'a Range<I>, + ) -> impl Iterator<Item = (Range<I>, &T)> + 'a { + self.ranges + .iter() + .filter(move |&(inner, ..)| inner.end > range.start && inner.start < range.end) + .map(move |(inner, v)| { + let new_range = inner.start.max(range.start)..inner.end.min(range.end); + + (new_range, v) + }) + } + + /// Split the storage ranges in such a way that there is a linear subset of + /// them occupying exactly `index` range, which is returned mutably. + /// + /// Gaps in the ranges are filled with `default` value. + pub fn isolate(&mut self, index: &Range<I>, default: T) -> &mut [(Range<I>, T)] { + //TODO: implement this in 2 passes: + // 1. scan the ranges to figure out how many extra ones need to be inserted + // 2. go through the ranges by moving them them to the right and inserting the missing ones + + let mut start_pos = match self.ranges.iter().position(|pair| pair.0.end > index.start) { + Some(pos) => pos, + None => { + let pos = self.ranges.len(); + self.ranges.push((index.clone(), default)); + return &mut self.ranges[pos..]; + } + }; + + { + let (range, value) = self.ranges[start_pos].clone(); + if range.start < index.start { + self.ranges[start_pos].0.start = index.start; + self.ranges + .insert(start_pos, (range.start..index.start, value)); + start_pos += 1; + } + } + let mut pos = start_pos; + let mut range_pos = index.start; + loop { + let (range, value) = self.ranges[pos].clone(); + if range.start >= index.end { + self.ranges.insert(pos, (range_pos..index.end, default)); + pos += 1; + break; + } + if range.start > range_pos { + self.ranges.insert(pos, (range_pos..range.start, default)); + pos += 1; + range_pos = range.start; + } + if range.end >= index.end { + if range.end != index.end { + self.ranges[pos].0.start = index.end; + self.ranges.insert(pos, (range_pos..index.end, value)); + } + pos += 1; + break; + } + pos += 1; + range_pos = range.end; + if pos == self.ranges.len() { + self.ranges.push((range_pos..index.end, default)); + pos += 1; + break; + } + } + + &mut self.ranges[start_pos..pos] + } + + /// Helper method for isolation that checks the sanity of the results. + #[cfg(test)] + pub fn sanely_isolated(&self, index: Range<I>, default: T) -> Vec<(Range<I>, T)> { + let mut clone = self.clone(); + let result = clone.isolate(&index, default).to_vec(); + clone.check_sanity(); + result + } +} + +#[cfg(test)] +mod test { + //TODO: randomized/fuzzy testing + use super::RangedStates; + + #[test] + fn sane_good() { + let rs = RangedStates::from_slice(&[(1..4, 9u8), (4..5, 9)]); + rs.check_sanity(); + } + + #[test] + #[should_panic] + fn sane_empty() { + let rs = RangedStates::from_slice(&[(1..4, 9u8), (5..5, 9)]); + rs.check_sanity(); + } + + #[test] + #[should_panic] + fn sane_intersect() { + let rs = RangedStates::from_slice(&[(1..4, 9u8), (3..5, 9)]); + rs.check_sanity(); + } + + #[test] + fn coalesce() { + let mut rs = RangedStates::from_slice(&[(1..4, 9u8), (4..5, 9), (5..7, 1), (8..9, 1)]); + rs.coalesce(); + rs.check_sanity(); + assert_eq!(rs.ranges.as_slice(), &[(1..5, 9), (5..7, 1), (8..9, 1),]); + } + + #[test] + fn isolate() { + let rs = RangedStates::from_slice(&[(1..4, 9u8), (4..5, 9), (5..7, 1), (8..9, 1)]); + assert_eq!(&rs.sanely_isolated(4..5, 0), &[(4..5, 9u8),]); + assert_eq!( + &rs.sanely_isolated(0..6, 0), + &[(0..1, 0), (1..4, 9u8), (4..5, 9), (5..6, 1),] + ); + assert_eq!(&rs.sanely_isolated(8..10, 1), &[(8..9, 1), (9..10, 1),]); + assert_eq!( + &rs.sanely_isolated(6..9, 0), + &[(6..7, 1), (7..8, 0), (8..9, 1),] + ); + } +} diff --git a/third_party/rust/wgpu-core/src/track/stateless.rs b/third_party/rust/wgpu-core/src/track/stateless.rs new file mode 100644 index 0000000000..4111a90f79 --- /dev/null +++ b/third_party/rust/wgpu-core/src/track/stateless.rs @@ -0,0 +1,238 @@ +/*! Stateless Trackers + * + * Stateless trackers don't have any state, so make no + * distinction between a usage scope and a full tracker. +!*/ + +use std::sync::Arc; + +use parking_lot::Mutex; + +use crate::{id::Id, resource::Resource, resource_log, storage::Storage, track::ResourceMetadata}; + +use super::ResourceTracker; + +/// Satisfy clippy. +type Pair<T> = (Id<<T as Resource>::Marker>, Arc<T>); + +/// Stores all the resources that a bind group stores. +#[derive(Debug)] +pub(crate) struct StatelessBindGroupSate<T: Resource> { + resources: Mutex<Vec<Pair<T>>>, +} + +impl<T: Resource> StatelessBindGroupSate<T> { + pub fn new() -> Self { + Self { + resources: Mutex::new(Vec::new()), + } + } + + /// Optimize the buffer bind group state by sorting it by ID. + /// + /// When this list of states is merged into a tracker, the memory + /// accesses will be in a constant ascending order. + pub(crate) fn optimize(&self) { + let mut resources = self.resources.lock(); + resources.sort_unstable_by_key(|&(id, _)| id.unzip().0); + } + + /// Returns a list of all resources tracked. May contain duplicates. + pub fn used_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ { + let resources = self.resources.lock(); + resources + .iter() + .map(|(_, resource)| resource.clone()) + .collect::<Vec<_>>() + .into_iter() + } + + /// Returns a list of all resources tracked. May contain duplicates. + pub fn drain_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ { + let mut resources = self.resources.lock(); + resources + .drain(..) + .map(|(_, r)| r) + .collect::<Vec<_>>() + .into_iter() + } + + /// Adds the given resource. + pub fn add_single<'a>(&self, storage: &'a Storage<T>, id: Id<T::Marker>) -> Option<&'a T> { + let resource = storage.get(id).ok()?; + + let mut resources = self.resources.lock(); + resources.push((id, resource.clone())); + + Some(resource) + } +} + +/// Stores all resource state within a command buffer or device. +#[derive(Debug)] +pub(crate) struct StatelessTracker<T: Resource> { + metadata: ResourceMetadata<T>, +} + +impl<T: Resource> ResourceTracker<T> for StatelessTracker<T> { + /// Try to remove the given resource from the tracker iff we have the last reference to the + /// resource and the epoch matches. + /// + /// Returns true if the resource was removed or if not existing in metadata. + /// + /// If the ID is higher than the length of internal vectors, + /// false will be returned. + fn remove_abandoned(&mut self, id: Id<T::Marker>) -> bool { + let index = id.unzip().0 as usize; + + if index >= self.metadata.size() { + return false; + } + + resource_log!("StatelessTracker::remove_abandoned {id:?}"); + + self.tracker_assert_in_bounds(index); + + unsafe { + if self.metadata.contains_unchecked(index) { + let existing_ref_count = self.metadata.get_ref_count_unchecked(index); + //RefCount 2 means that resource is hold just by DeviceTracker and this suspected resource itself + //so it's already been released from user and so it's not inside Registry\Storage + if existing_ref_count <= 2 { + self.metadata.remove(index); + log::trace!("{} {:?} is not tracked anymore", T::TYPE, id,); + return true; + } else { + log::trace!( + "{} {:?} is still referenced from {}", + T::TYPE, + id, + existing_ref_count + ); + return false; + } + } + } + true + } +} + +impl<T: Resource> StatelessTracker<T> { + pub fn new() -> Self { + Self { + metadata: ResourceMetadata::new(), + } + } + + fn tracker_assert_in_bounds(&self, index: usize) { + self.metadata.tracker_assert_in_bounds(index); + } + + /// Sets the size of all the vectors inside the tracker. + /// + /// Must be called with the highest possible Resource ID of this type + /// before all unsafe functions are called. + pub fn set_size(&mut self, size: usize) { + self.metadata.set_size(size); + } + + /// Extend the vectors to let the given index be valid. + fn allow_index(&mut self, index: usize) { + if index >= self.metadata.size() { + self.set_size(index + 1); + } + } + + /// Returns a list of all resources tracked. + pub fn used_resources(&self) -> impl Iterator<Item = Arc<T>> + '_ { + self.metadata.owned_resources() + } + + /// Returns a list of all resources tracked. + pub fn drain_resources(&mut self) -> impl Iterator<Item = Arc<T>> + '_ { + let resources = self.metadata.drain_resources(); + resources.into_iter() + } + + /// Inserts a single resource into the resource tracker. + /// + /// If the resource already exists in the tracker, it will be overwritten. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn insert_single(&mut self, id: Id<T::Marker>, resource: Arc<T>) { + let (index32, _epoch, _) = id.unzip(); + let index = index32 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + self.metadata.insert(index, resource); + } + } + + /// Adds the given resource to the tracker. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn add_single<'a>( + &mut self, + storage: &'a Storage<T>, + id: Id<T::Marker>, + ) -> Option<&'a Arc<T>> { + let resource = storage.get(id).ok()?; + + let (index32, _epoch, _) = id.unzip(); + let index = index32 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + self.metadata.insert(index, resource.clone()); + } + + Some(resource) + } + + /// Adds the given resources from the given tracker. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn add_from_tracker(&mut self, other: &Self) { + let incoming_size = other.metadata.size(); + if incoming_size > self.metadata.size() { + self.set_size(incoming_size); + } + + for index in other.metadata.owned_indices() { + self.tracker_assert_in_bounds(index); + other.tracker_assert_in_bounds(index); + unsafe { + let previously_owned = self.metadata.contains_unchecked(index); + + if !previously_owned { + let other_resource = other.metadata.get_resource_unchecked(index); + self.metadata.insert(index, other_resource.clone()); + } + } + } + } + + pub fn get(&self, id: Id<T::Marker>) -> Option<&Arc<T>> { + let index = id.unzip().0 as usize; + if index > self.metadata.size() { + return None; + } + self.tracker_assert_in_bounds(index); + unsafe { + if self.metadata.contains_unchecked(index) { + return Some(self.metadata.get_resource_unchecked(index)); + } + } + None + } +} diff --git a/third_party/rust/wgpu-core/src/track/texture.rs b/third_party/rust/wgpu-core/src/track/texture.rs new file mode 100644 index 0000000000..601df11e1b --- /dev/null +++ b/third_party/rust/wgpu-core/src/track/texture.rs @@ -0,0 +1,1492 @@ +/*! Texture Trackers + * + * Texture trackers are significantly more complicated than + * the buffer trackers because textures can be in a "complex" + * state where each individual subresource can potentially be + * in a different state from every other subtresource. These + * complex states are stored separately from the simple states + * because they are signifignatly more difficult to track and + * most resources spend the vast majority of their lives in + * simple states. + * + * There are two special texture usages: `UNKNOWN` and `UNINITIALIZED`. + * - `UNKNOWN` is only used in complex states and is used to signify + * that the complex state does not know anything about those subresources. + * It cannot leak into transitions, it is invalid to transition into UNKNOWN + * state. + * - `UNINITIALIZED` is used in both simple and complex states to mean the texture + * is known to be in some undefined state. Any transition away from UNINITIALIZED + * will treat the contents as junk. +!*/ + +use super::{range::RangedStates, PendingTransition, PendingTransitionList, ResourceTracker}; +use crate::{ + hal_api::HalApi, + id::TextureId, + resource::{Resource, Texture, TextureInner}, + snatch::SnatchGuard, + track::{ + invalid_resource_state, skip_barrier, ResourceMetadata, ResourceMetadataProvider, + ResourceUses, UsageConflict, + }, +}; +use hal::TextureUses; + +use arrayvec::ArrayVec; +use naga::FastHashMap; + +use parking_lot::Mutex; +use wgt::{strict_assert, strict_assert_eq}; + +use std::{borrow::Cow, iter, marker::PhantomData, ops::Range, sync::Arc, vec::Drain}; + +/// Specifies a particular set of subresources in a texture. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct TextureSelector { + pub mips: Range<u32>, + pub layers: Range<u32>, +} + +impl ResourceUses for TextureUses { + const EXCLUSIVE: Self = Self::EXCLUSIVE; + + type Selector = TextureSelector; + + fn bits(self) -> u16 { + Self::bits(&self) + } + + fn all_ordered(self) -> bool { + Self::ORDERED.contains(self) + } + + fn any_exclusive(self) -> bool { + self.intersects(Self::EXCLUSIVE) + } +} + +/// Represents the complex state of textures where every subresource is potentially +/// in a different state. +#[derive(Clone, Debug, Default, PartialEq)] +struct ComplexTextureState { + mips: ArrayVec<RangedStates<u32, TextureUses>, { hal::MAX_MIP_LEVELS as usize }>, +} + +impl ComplexTextureState { + /// Creates complex texture state for the given sizes. + /// + /// This state will be initialized with the UNKNOWN state, a special state + /// which means the trakcer knows nothing about the state. + fn new(mip_level_count: u32, array_layer_count: u32) -> Self { + Self { + mips: iter::repeat_with(|| { + RangedStates::from_range(0..array_layer_count, TextureUses::UNKNOWN) + }) + .take(mip_level_count as usize) + .collect(), + } + } + + /// Initialize a complex state from a selector representing the full size of the texture + /// and an iterator of a selector and a texture use, specifying a usage for a specific + /// set of subresources. + /// + /// [`Self::to_selector_state_iter`] can be used to create such an iterator. + /// + /// # Safety + /// + /// All selectors in the iterator must be inside of the full_range selector. + /// + /// The full range selector must have mips and layers start at 0. + unsafe fn from_selector_state_iter( + full_range: TextureSelector, + state_iter: impl Iterator<Item = (TextureSelector, TextureUses)>, + ) -> Self { + strict_assert_eq!(full_range.layers.start, 0); + strict_assert_eq!(full_range.mips.start, 0); + + let mut complex = + ComplexTextureState::new(full_range.mips.len() as u32, full_range.layers.len() as u32); + for (selector, desired_state) in state_iter { + strict_assert!(selector.layers.end <= full_range.layers.end); + strict_assert!(selector.mips.end <= full_range.mips.end); + + // This should only ever happen with a wgpu bug, but let's just double + // check that resource states don't have any conflicts. + strict_assert_eq!(invalid_resource_state(desired_state), false); + + let mips = selector.mips.start as usize..selector.mips.end as usize; + for mip in unsafe { complex.mips.get_unchecked_mut(mips) } { + for &mut (_, ref mut state) in mip.isolate(&selector.layers, TextureUses::UNKNOWN) { + *state = desired_state; + } + } + } + complex + } + + /// Convert a complex state into an iterator over all states stored. + /// + /// [`Self::from_selector_state_iter`] can be used to consume such an iterator. + fn to_selector_state_iter( + &self, + ) -> impl Iterator<Item = (TextureSelector, TextureUses)> + Clone + '_ { + self.mips.iter().enumerate().flat_map(|(mip, inner)| { + let mip = mip as u32; + { + inner.iter().map(move |&(ref layers, inner)| { + ( + TextureSelector { + mips: mip..mip + 1, + layers: layers.clone(), + }, + inner, + ) + }) + } + }) + } +} + +#[derive(Debug)] +struct TextureBindGroupStateData<A: HalApi> { + selector: Option<TextureSelector>, + texture: Arc<Texture<A>>, + usage: TextureUses, +} + +/// Stores all the textures that a bind group stores. +#[derive(Debug)] +pub(crate) struct TextureBindGroupState<A: HalApi> { + textures: Mutex<Vec<TextureBindGroupStateData<A>>>, +} +impl<A: HalApi> TextureBindGroupState<A> { + pub fn new() -> Self { + Self { + textures: Mutex::new(Vec::new()), + } + } + + /// Optimize the texture bind group state by sorting it by ID. + /// + /// When this list of states is merged into a tracker, the memory + /// accesses will be in a constant ascending order. + pub(crate) fn optimize(&self) { + let mut textures = self.textures.lock(); + textures.sort_unstable_by_key(|v| v.texture.as_info().id().unzip().0); + } + + /// Returns a list of all textures tracked. May contain duplicates. + pub fn drain_resources(&self) -> impl Iterator<Item = Arc<Texture<A>>> + '_ { + let mut textures = self.textures.lock(); + textures + .drain(..) + .map(|v| v.texture) + .collect::<Vec<_>>() + .into_iter() + } + + /// Adds the given resource with the given state. + pub fn add_single<'a>( + &self, + texture: &'a Arc<Texture<A>>, + selector: Option<TextureSelector>, + state: TextureUses, + ) -> Option<&'a Arc<Texture<A>>> { + let mut textures = self.textures.lock(); + textures.push(TextureBindGroupStateData { + selector, + texture: texture.clone(), + usage: state, + }); + Some(texture) + } +} + +/// Container for corresponding simple and complex texture states. +#[derive(Debug)] +pub(crate) struct TextureStateSet { + simple: Vec<TextureUses>, + complex: FastHashMap<usize, ComplexTextureState>, +} +impl TextureStateSet { + fn new() -> Self { + Self { + simple: Vec::new(), + complex: FastHashMap::default(), + } + } + + fn clear(&mut self) { + self.simple.clear(); + self.complex.clear(); + } + + fn set_size(&mut self, size: usize) { + self.simple.resize(size, TextureUses::UNINITIALIZED); + } +} + +/// Stores all texture state within a single usage scope. +#[derive(Debug)] +pub(crate) struct TextureUsageScope<A: HalApi> { + set: TextureStateSet, + metadata: ResourceMetadata<Texture<A>>, +} + +impl<A: HalApi> TextureUsageScope<A> { + pub fn new() -> Self { + Self { + set: TextureStateSet::new(), + + metadata: ResourceMetadata::new(), + } + } + + fn tracker_assert_in_bounds(&self, index: usize) { + self.metadata.tracker_assert_in_bounds(index); + + strict_assert!(index < self.set.simple.len()); + + strict_assert!(if self.metadata.contains(index) + && self.set.simple[index] == TextureUses::COMPLEX + { + self.set.complex.contains_key(&index) + } else { + true + }); + } + + /// Sets the size of all the vectors inside the tracker. + /// + /// Must be called with the highest possible Texture ID before + /// all unsafe functions are called. + pub fn set_size(&mut self, size: usize) { + self.set.set_size(size); + self.metadata.set_size(size); + } + + /// Drains all textures tracked. + pub(crate) fn drain_resources(&mut self) -> impl Iterator<Item = Arc<Texture<A>>> + '_ { + let resources = self.metadata.drain_resources(); + self.set.clear(); + resources.into_iter() + } + + /// Returns true if the tracker owns no resources. + /// + /// This is a O(n) operation. + pub(crate) fn is_empty(&self) -> bool { + self.metadata.is_empty() + } + + /// Merge the list of texture states in the given usage scope into this UsageScope. + /// + /// If any of the resulting states is invalid, stops the merge and returns a usage + /// conflict with the details of the invalid state. + /// + /// If the given tracker uses IDs higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn merge_usage_scope(&mut self, scope: &Self) -> Result<(), UsageConflict> { + let incoming_size = scope.set.simple.len(); + if incoming_size > self.set.simple.len() { + self.set_size(incoming_size); + } + + for index in scope.metadata.owned_indices() { + self.tracker_assert_in_bounds(index); + scope.tracker_assert_in_bounds(index); + + let texture_selector = + unsafe { &scope.metadata.get_resource_unchecked(index).full_range }; + unsafe { + insert_or_merge( + texture_selector, + &mut self.set, + &mut self.metadata, + index, + TextureStateProvider::TextureSet { set: &scope.set }, + ResourceMetadataProvider::Indirect { + metadata: &scope.metadata, + }, + )? + }; + } + + Ok(()) + } + + /// Merge the list of texture states in the given bind group into this usage scope. + /// + /// If any of the resulting states is invalid, stops the merge and returns a usage + /// conflict with the details of the invalid state. + /// + /// Because bind groups do not check if the union of all their states is valid, + /// this method is allowed to return Err on the first bind group bound. + /// + /// # Safety + /// + /// [`Self::set_size`] must be called with the maximum possible Buffer ID before this + /// method is called. + pub unsafe fn merge_bind_group( + &mut self, + bind_group: &TextureBindGroupState<A>, + ) -> Result<(), UsageConflict> { + let textures = bind_group.textures.lock(); + for t in &*textures { + unsafe { self.merge_single(&t.texture, t.selector.clone(), t.usage)? }; + } + + Ok(()) + } + + /// Merge a single state into the UsageScope. + /// + /// If the resulting state is invalid, returns a usage + /// conflict with the details of the invalid state. + /// + /// # Safety + /// + /// Unlike other trackers whose merge_single is safe, this method is only + /// called where there is already other unsafe tracking functions active, + /// so we can prove this unsafe "for free". + /// + /// [`Self::set_size`] must be called with the maximum possible Buffer ID before this + /// method is called. + pub unsafe fn merge_single( + &mut self, + texture: &Arc<Texture<A>>, + selector: Option<TextureSelector>, + new_state: TextureUses, + ) -> Result<(), UsageConflict> { + let index = texture.as_info().id().unzip().0 as usize; + + self.tracker_assert_in_bounds(index); + + let texture_selector = &texture.full_range; + unsafe { + insert_or_merge( + texture_selector, + &mut self.set, + &mut self.metadata, + index, + TextureStateProvider::from_option(selector, new_state), + ResourceMetadataProvider::Direct { + resource: Cow::Borrowed(texture), + }, + )? + }; + + Ok(()) + } +} + +/// Stores all texture state within a command buffer or device. +pub(crate) struct TextureTracker<A: HalApi> { + start_set: TextureStateSet, + end_set: TextureStateSet, + + metadata: ResourceMetadata<Texture<A>>, + + temp: Vec<PendingTransition<TextureUses>>, + + _phantom: PhantomData<A>, +} + +impl<A: HalApi> ResourceTracker<Texture<A>> for TextureTracker<A> { + /// Try to remove the given resource from the tracker iff we have the last reference to the + /// resource and the epoch matches. + /// + /// Returns true if the resource was removed or if not existing in metadata. + /// + /// If the ID is higher than the length of internal vectors, + /// false will be returned. + fn remove_abandoned(&mut self, id: TextureId) -> bool { + let index = id.unzip().0 as usize; + + if index > self.metadata.size() { + return false; + } + + self.tracker_assert_in_bounds(index); + + unsafe { + if self.metadata.contains_unchecked(index) { + let existing_ref_count = self.metadata.get_ref_count_unchecked(index); + //RefCount 2 means that resource is hold just by DeviceTracker and this suspected resource itself + //so it's already been released from user and so it's not inside Registry\Storage + if existing_ref_count <= 2 { + self.start_set.complex.remove(&index); + self.end_set.complex.remove(&index); + self.metadata.remove(index); + log::trace!("Texture {:?} is not tracked anymore", id,); + return true; + } else { + log::trace!( + "Texture {:?} is still referenced from {}", + id, + existing_ref_count + ); + return false; + } + } + } + true + } +} + +impl<A: HalApi> TextureTracker<A> { + pub fn new() -> Self { + Self { + start_set: TextureStateSet::new(), + end_set: TextureStateSet::new(), + + metadata: ResourceMetadata::new(), + + temp: Vec::new(), + + _phantom: PhantomData, + } + } + + fn tracker_assert_in_bounds(&self, index: usize) { + self.metadata.tracker_assert_in_bounds(index); + + strict_assert!(index < self.start_set.simple.len()); + strict_assert!(index < self.end_set.simple.len()); + + strict_assert!(if self.metadata.contains(index) + && self.start_set.simple[index] == TextureUses::COMPLEX + { + self.start_set.complex.contains_key(&index) + } else { + true + }); + strict_assert!(if self.metadata.contains(index) + && self.end_set.simple[index] == TextureUses::COMPLEX + { + self.end_set.complex.contains_key(&index) + } else { + true + }); + } + + /// Sets the size of all the vectors inside the tracker. + /// + /// Must be called with the highest possible Texture ID before + /// all unsafe functions are called. + pub fn set_size(&mut self, size: usize) { + self.start_set.set_size(size); + self.end_set.set_size(size); + + self.metadata.set_size(size); + } + + /// Extend the vectors to let the given index be valid. + fn allow_index(&mut self, index: usize) { + if index >= self.start_set.simple.len() { + self.set_size(index + 1); + } + } + + /// Returns a list of all textures tracked. + pub fn used_resources(&self) -> impl Iterator<Item = Arc<Texture<A>>> + '_ { + self.metadata.owned_resources() + } + + /// Drain all currently pending transitions. + pub fn drain_transitions<'a>( + &'a mut self, + snatch_guard: &'a SnatchGuard<'a>, + ) -> (PendingTransitionList, Vec<Option<&'a TextureInner<A>>>) { + let mut textures = Vec::new(); + let transitions = self + .temp + .drain(..) + .map(|pending| { + let tex = unsafe { self.metadata.get_resource_unchecked(pending.id as _) }; + textures.push(tex.inner.get(snatch_guard)); + pending + }) + .collect(); + (transitions, textures) + } + + /// Inserts a single texture and a state into the resource tracker. + /// + /// If the resource already exists in the tracker, this will panic. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn insert_single(&mut self, id: TextureId, resource: Arc<Texture<A>>, usage: TextureUses) { + let index = id.unzip().0 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + let currently_owned = self.metadata.contains_unchecked(index); + + if currently_owned { + panic!("Tried to insert texture already tracked"); + } + + insert( + None, + Some(&mut self.start_set), + &mut self.end_set, + &mut self.metadata, + index, + TextureStateProvider::KnownSingle { state: usage }, + None, + ResourceMetadataProvider::Direct { + resource: Cow::Owned(resource), + }, + ) + }; + } + + /// Sets the state of a single texture. + /// + /// If a transition is needed to get the texture into the given state, that transition + /// is returned. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn set_single( + &mut self, + texture: &Arc<Texture<A>>, + selector: TextureSelector, + new_state: TextureUses, + ) -> Option<Drain<'_, PendingTransition<TextureUses>>> { + let index = texture.as_info().id().unzip().0 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + insert_or_barrier_update( + &texture.full_range, + Some(&mut self.start_set), + &mut self.end_set, + &mut self.metadata, + index, + TextureStateProvider::Selector { + selector, + state: new_state, + }, + None, + ResourceMetadataProvider::Direct { + resource: Cow::Owned(texture.clone()), + }, + &mut self.temp, + ) + } + + Some(self.temp.drain(..)) + } + + /// Sets the given state for all texture in the given tracker. + /// + /// If a transition is needed to get the texture into the needed state, + /// those transitions are stored within the tracker. A subsequent + /// call to [`Self::drain_transitions`] is needed to get those transitions. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn set_from_tracker(&mut self, tracker: &Self) { + let incoming_size = tracker.start_set.simple.len(); + if incoming_size > self.start_set.simple.len() { + self.set_size(incoming_size); + } + + for index in tracker.metadata.owned_indices() { + self.tracker_assert_in_bounds(index); + tracker.tracker_assert_in_bounds(index); + unsafe { + let texture_selector = &tracker.metadata.get_resource_unchecked(index).full_range; + insert_or_barrier_update( + texture_selector, + Some(&mut self.start_set), + &mut self.end_set, + &mut self.metadata, + index, + TextureStateProvider::TextureSet { + set: &tracker.start_set, + }, + Some(TextureStateProvider::TextureSet { + set: &tracker.end_set, + }), + ResourceMetadataProvider::Indirect { + metadata: &tracker.metadata, + }, + &mut self.temp, + ); + } + } + } + + /// Sets the given state for all textures in the given UsageScope. + /// + /// If a transition is needed to get the textures into the needed state, + /// those transitions are stored within the tracker. A subsequent + /// call to [`Self::drain_transitions`] is needed to get those transitions. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn set_from_usage_scope(&mut self, scope: &TextureUsageScope<A>) { + let incoming_size = scope.set.simple.len(); + if incoming_size > self.start_set.simple.len() { + self.set_size(incoming_size); + } + + for index in scope.metadata.owned_indices() { + self.tracker_assert_in_bounds(index); + scope.tracker_assert_in_bounds(index); + unsafe { + let texture_selector = &scope.metadata.get_resource_unchecked(index).full_range; + insert_or_barrier_update( + texture_selector, + Some(&mut self.start_set), + &mut self.end_set, + &mut self.metadata, + index, + TextureStateProvider::TextureSet { set: &scope.set }, + None, + ResourceMetadataProvider::Indirect { + metadata: &scope.metadata, + }, + &mut self.temp, + ); + } + } + } + + /// Iterates through all textures in the given bind group and adopts + /// the state given for those textures in the UsageScope. It also + /// removes all touched textures from the usage scope. + /// + /// If a transition is needed to get the textures into the needed state, + /// those transitions are stored within the tracker. A subsequent + /// call to [`Self::drain_transitions`] is needed to get those transitions. + /// + /// This is a really funky method used by Compute Passes to generate + /// barriers after a call to dispatch without needing to iterate + /// over all elements in the usage scope. We use each the + /// bind group as a source of which IDs to look at. The bind groups + /// must have first been added to the usage scope. + /// + /// # Safety + /// + /// [`Self::set_size`] must be called with the maximum possible Buffer ID before this + /// method is called. + pub unsafe fn set_and_remove_from_usage_scope_sparse( + &mut self, + scope: &mut TextureUsageScope<A>, + bind_group_state: &TextureBindGroupState<A>, + ) { + let incoming_size = scope.set.simple.len(); + if incoming_size > self.start_set.simple.len() { + self.set_size(incoming_size); + } + + let textures = bind_group_state.textures.lock(); + for t in textures.iter() { + let index = t.texture.as_info().id().unzip().0 as usize; + scope.tracker_assert_in_bounds(index); + + if unsafe { !scope.metadata.contains_unchecked(index) } { + continue; + } + let texture_selector = &t.texture.full_range; + unsafe { + insert_or_barrier_update( + texture_selector, + Some(&mut self.start_set), + &mut self.end_set, + &mut self.metadata, + index, + TextureStateProvider::TextureSet { set: &scope.set }, + None, + ResourceMetadataProvider::Indirect { + metadata: &scope.metadata, + }, + &mut self.temp, + ) + }; + + unsafe { scope.metadata.remove(index) }; + } + } + + /// Unconditionally removes the given resource from the tracker. + /// + /// Returns true if the resource was removed. + /// + /// If the ID is higher than the length of internal vectors, + /// false will be returned. + pub fn remove(&mut self, id: TextureId) -> bool { + let index = id.unzip().0 as usize; + + if index > self.metadata.size() { + return false; + } + + self.tracker_assert_in_bounds(index); + + unsafe { + if self.metadata.contains_unchecked(index) { + self.start_set.complex.remove(&index); + self.end_set.complex.remove(&index); + self.metadata.remove(index); + return true; + } + } + + false + } +} + +/// An iterator adapter that can store two different iterator types. +#[derive(Clone)] +enum EitherIter<L, R> { + Left(L), + Right(R), +} + +impl<L, R, D> Iterator for EitherIter<L, R> +where + L: Iterator<Item = D>, + R: Iterator<Item = D>, +{ + type Item = D; + + fn next(&mut self) -> Option<Self::Item> { + match *self { + EitherIter::Left(ref mut inner) => inner.next(), + EitherIter::Right(ref mut inner) => inner.next(), + } + } +} + +/// Container that signifies storing both different things +/// if there is a single state or many different states +/// involved in the operation. +#[derive(Debug, Clone)] +enum SingleOrManyStates<S, M> { + Single(S), + Many(M), +} + +/// A source of texture state. +#[derive(Clone)] +enum TextureStateProvider<'a> { + /// Comes directly from a single state. + KnownSingle { state: TextureUses }, + /// Comes from a selector and a single state. + Selector { + selector: TextureSelector, + state: TextureUses, + }, + /// Comes from another texture set. + TextureSet { set: &'a TextureStateSet }, +} +impl<'a> TextureStateProvider<'a> { + /// Convenience function turning `Option<Selector>` into this enum. + fn from_option(selector: Option<TextureSelector>, state: TextureUses) -> Self { + match selector { + Some(selector) => Self::Selector { selector, state }, + None => Self::KnownSingle { state }, + } + } + + /// Get the state provided by this. + /// + /// # Panics + /// + /// Panics if texture_selector is None and this uses a Selector source. + /// + /// # Safety + /// + /// - The index must be in bounds of the state set if this uses an TextureSet source. + #[inline(always)] + unsafe fn get_state( + self, + texture_selector: Option<&TextureSelector>, + index: usize, + ) -> SingleOrManyStates< + TextureUses, + impl Iterator<Item = (TextureSelector, TextureUses)> + Clone + 'a, + > { + match self { + TextureStateProvider::KnownSingle { state } => SingleOrManyStates::Single(state), + TextureStateProvider::Selector { selector, state } => { + // We check if the selector given is actually for the full resource, + // and if it is we promote to a simple state. This allows upstream + // code to specify selectors willy nilly, and all that are really + // single states are promoted here. + if *texture_selector.unwrap() == selector { + SingleOrManyStates::Single(state) + } else { + SingleOrManyStates::Many(EitherIter::Left(iter::once((selector, state)))) + } + } + TextureStateProvider::TextureSet { set } => { + let new_state = *unsafe { set.simple.get_unchecked(index) }; + + if new_state == TextureUses::COMPLEX { + let new_complex = unsafe { set.complex.get(&index).unwrap_unchecked() }; + + SingleOrManyStates::Many(EitherIter::Right( + new_complex.to_selector_state_iter(), + )) + } else { + SingleOrManyStates::Single(new_state) + } + } + } + } +} + +/// Does an insertion operation if the index isn't tracked +/// in the current metadata, otherwise merges the given state +/// with the current state. If the merging would cause +/// a conflict, returns that usage conflict. +/// +/// # Safety +/// +/// Indexes must be valid indexes into all arrays passed in +/// to this function, either directly or via metadata or provider structs. +#[inline(always)] +unsafe fn insert_or_merge<A: HalApi>( + texture_selector: &TextureSelector, + current_state_set: &mut TextureStateSet, + resource_metadata: &mut ResourceMetadata<Texture<A>>, + index: usize, + state_provider: TextureStateProvider<'_>, + metadata_provider: ResourceMetadataProvider<'_, Texture<A>>, +) -> Result<(), UsageConflict> { + let currently_owned = unsafe { resource_metadata.contains_unchecked(index) }; + + if !currently_owned { + unsafe { + insert( + Some(texture_selector), + None, + current_state_set, + resource_metadata, + index, + state_provider, + None, + metadata_provider, + ) + }; + return Ok(()); + } + + unsafe { + merge( + texture_selector, + current_state_set, + index, + state_provider, + metadata_provider, + ) + } +} + +/// If the resource isn't tracked +/// - Inserts the given resource. +/// - Uses the `start_state_provider` to populate `start_states` +/// - Uses either `end_state_provider` or `start_state_provider` +/// to populate `current_states`. +/// If the resource is tracked +/// - Inserts barriers from the state in `current_states` +/// to the state provided by `start_state_provider`. +/// - Updates the `current_states` with either the state from +/// `end_state_provider` or `start_state_provider`. +/// +/// Any barriers are added to the barrier vector. +/// +/// # Safety +/// +/// Indexes must be valid indexes into all arrays passed in +/// to this function, either directly or via metadata or provider structs. +#[inline(always)] +unsafe fn insert_or_barrier_update<A: HalApi>( + texture_selector: &TextureSelector, + start_state: Option<&mut TextureStateSet>, + current_state_set: &mut TextureStateSet, + resource_metadata: &mut ResourceMetadata<Texture<A>>, + index: usize, + start_state_provider: TextureStateProvider<'_>, + end_state_provider: Option<TextureStateProvider<'_>>, + metadata_provider: ResourceMetadataProvider<'_, Texture<A>>, + barriers: &mut Vec<PendingTransition<TextureUses>>, +) { + let currently_owned = unsafe { resource_metadata.contains_unchecked(index) }; + + if !currently_owned { + unsafe { + insert( + Some(texture_selector), + start_state, + current_state_set, + resource_metadata, + index, + start_state_provider, + end_state_provider, + metadata_provider, + ) + }; + return; + } + + let update_state_provider = end_state_provider.unwrap_or_else(|| start_state_provider.clone()); + unsafe { + barrier( + texture_selector, + current_state_set, + index, + start_state_provider, + barriers, + ) + }; + + let start_state_set = start_state.unwrap(); + unsafe { + update( + texture_selector, + start_state_set, + current_state_set, + index, + update_state_provider, + ) + }; +} + +#[inline(always)] +unsafe fn insert<A: HalApi>( + texture_selector: Option<&TextureSelector>, + start_state: Option<&mut TextureStateSet>, + end_state: &mut TextureStateSet, + resource_metadata: &mut ResourceMetadata<Texture<A>>, + index: usize, + start_state_provider: TextureStateProvider<'_>, + end_state_provider: Option<TextureStateProvider<'_>>, + metadata_provider: ResourceMetadataProvider<'_, Texture<A>>, +) { + let start_layers = unsafe { start_state_provider.get_state(texture_selector, index) }; + match start_layers { + SingleOrManyStates::Single(state) => { + // This should only ever happen with a wgpu bug, but let's just double + // check that resource states don't have any conflicts. + strict_assert_eq!(invalid_resource_state(state), false); + + log::trace!("\ttex {index}: insert start {state:?}"); + + if let Some(start_state) = start_state { + unsafe { *start_state.simple.get_unchecked_mut(index) = state }; + } + + // We only need to insert ourselves the end state if there is no end state provider. + if end_state_provider.is_none() { + unsafe { *end_state.simple.get_unchecked_mut(index) = state }; + } + } + SingleOrManyStates::Many(state_iter) => { + let full_range = texture_selector.unwrap().clone(); + + let complex = + unsafe { ComplexTextureState::from_selector_state_iter(full_range, state_iter) }; + + log::trace!("\ttex {index}: insert start {complex:?}"); + + if let Some(start_state) = start_state { + unsafe { *start_state.simple.get_unchecked_mut(index) = TextureUses::COMPLEX }; + start_state.complex.insert(index, complex.clone()); + } + + // We only need to insert ourselves the end state if there is no end state provider. + if end_state_provider.is_none() { + unsafe { *end_state.simple.get_unchecked_mut(index) = TextureUses::COMPLEX }; + end_state.complex.insert(index, complex); + } + } + } + + if let Some(end_state_provider) = end_state_provider { + match unsafe { end_state_provider.get_state(texture_selector, index) } { + SingleOrManyStates::Single(state) => { + // This should only ever happen with a wgpu bug, but let's just double + // check that resource states don't have any conflicts. + strict_assert_eq!(invalid_resource_state(state), false); + + log::trace!("\ttex {index}: insert end {state:?}"); + + // We only need to insert into the end, as there is guaranteed to be + // a start state provider. + unsafe { *end_state.simple.get_unchecked_mut(index) = state }; + } + SingleOrManyStates::Many(state_iter) => { + let full_range = texture_selector.unwrap().clone(); + + let complex = unsafe { + ComplexTextureState::from_selector_state_iter(full_range, state_iter) + }; + + log::trace!("\ttex {index}: insert end {complex:?}"); + + // We only need to insert into the end, as there is guaranteed to be + // a start state provider. + unsafe { *end_state.simple.get_unchecked_mut(index) = TextureUses::COMPLEX }; + end_state.complex.insert(index, complex); + } + } + } + + unsafe { + let resource = metadata_provider.get_own(index); + resource_metadata.insert(index, resource); + } +} + +#[inline(always)] +unsafe fn merge<A: HalApi>( + texture_selector: &TextureSelector, + current_state_set: &mut TextureStateSet, + index: usize, + state_provider: TextureStateProvider<'_>, + metadata_provider: ResourceMetadataProvider<'_, Texture<A>>, +) -> Result<(), UsageConflict> { + let current_simple = unsafe { current_state_set.simple.get_unchecked_mut(index) }; + let current_state = if *current_simple == TextureUses::COMPLEX { + SingleOrManyStates::Many(unsafe { + current_state_set.complex.get_mut(&index).unwrap_unchecked() + }) + } else { + SingleOrManyStates::Single(current_simple) + }; + + let new_state = unsafe { state_provider.get_state(Some(texture_selector), index) }; + + match (current_state, new_state) { + (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Single(new_simple)) => { + let merged_state = *current_simple | new_simple; + + log::trace!("\ttex {index}: merge simple {current_simple:?} + {new_simple:?}"); + + if invalid_resource_state(merged_state) { + return Err(UsageConflict::from_texture( + TextureId::zip( + index as _, + unsafe { metadata_provider.get_epoch(index) }, + A::VARIANT, + ), + texture_selector.clone(), + *current_simple, + new_simple, + )); + } + + *current_simple = merged_state; + } + (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Many(new_many)) => { + // Because we are now demoting this simple state to a complex state, + // we actually need to make a whole new complex state for us to use + // as there wasn't one before. + let mut new_complex = unsafe { + ComplexTextureState::from_selector_state_iter( + texture_selector.clone(), + iter::once((texture_selector.clone(), *current_simple)), + ) + }; + + for (selector, new_state) in new_many { + let merged_state = *current_simple | new_state; + + log::trace!("\ttex {index}: merge {selector:?} {current_simple:?} + {new_state:?}"); + + if invalid_resource_state(merged_state) { + return Err(UsageConflict::from_texture( + TextureId::zip( + index as _, + unsafe { metadata_provider.get_epoch(index) }, + A::VARIANT, + ), + selector, + *current_simple, + new_state, + )); + } + + for mip in + &mut new_complex.mips[selector.mips.start as usize..selector.mips.end as usize] + { + for &mut (_, ref mut current_layer_state) in + mip.isolate(&selector.layers, TextureUses::UNKNOWN) + { + *current_layer_state = merged_state; + } + + mip.coalesce(); + } + } + + *current_simple = TextureUses::COMPLEX; + current_state_set.complex.insert(index, new_complex); + } + (SingleOrManyStates::Many(current_complex), SingleOrManyStates::Single(new_simple)) => { + for (mip_id, mip) in current_complex.mips.iter_mut().enumerate() { + let mip_id = mip_id as u32; + + for &mut (ref layers, ref mut current_layer_state) in mip.iter_mut() { + let merged_state = *current_layer_state | new_simple; + + // Once we remove unknown, this will never be empty, as + // simple states are never unknown. + let merged_state = merged_state - TextureUses::UNKNOWN; + + log::trace!( + "\ttex {index}: merge mip {mip_id} layers {layers:?} \ + {current_layer_state:?} + {new_simple:?}" + ); + + if invalid_resource_state(merged_state) { + return Err(UsageConflict::from_texture( + TextureId::zip( + index as _, + unsafe { metadata_provider.get_epoch(index) }, + A::VARIANT, + ), + TextureSelector { + mips: mip_id..mip_id + 1, + layers: layers.clone(), + }, + *current_layer_state, + new_simple, + )); + } + + *current_layer_state = merged_state; + } + + mip.coalesce(); + } + } + (SingleOrManyStates::Many(current_complex), SingleOrManyStates::Many(new_many)) => { + for (selector, new_state) in new_many { + for mip_id in selector.mips { + strict_assert!((mip_id as usize) < current_complex.mips.len()); + + let mip = unsafe { current_complex.mips.get_unchecked_mut(mip_id as usize) }; + + for &mut (ref layers, ref mut current_layer_state) in + mip.isolate(&selector.layers, TextureUses::UNKNOWN) + { + let merged_state = *current_layer_state | new_state; + let merged_state = merged_state - TextureUses::UNKNOWN; + + if merged_state.is_empty() { + // We know nothing about this state, lets just move on. + continue; + } + + log::trace!( + "\ttex {index}: merge mip {mip_id} layers {layers:?} \ + {current_layer_state:?} + {new_state:?}" + ); + + if invalid_resource_state(merged_state) { + return Err(UsageConflict::from_texture( + TextureId::zip( + index as _, + unsafe { metadata_provider.get_epoch(index) }, + A::VARIANT, + ), + TextureSelector { + mips: mip_id..mip_id + 1, + layers: layers.clone(), + }, + *current_layer_state, + new_state, + )); + } + *current_layer_state = merged_state; + } + + mip.coalesce(); + } + } + } + } + Ok(()) +} + +#[inline(always)] +unsafe fn barrier( + texture_selector: &TextureSelector, + current_state_set: &TextureStateSet, + index: usize, + state_provider: TextureStateProvider<'_>, + barriers: &mut Vec<PendingTransition<TextureUses>>, +) { + let current_simple = unsafe { *current_state_set.simple.get_unchecked(index) }; + let current_state = if current_simple == TextureUses::COMPLEX { + SingleOrManyStates::Many(unsafe { + current_state_set.complex.get(&index).unwrap_unchecked() + }) + } else { + SingleOrManyStates::Single(current_simple) + }; + + let new_state = unsafe { state_provider.get_state(Some(texture_selector), index) }; + + match (current_state, new_state) { + (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Single(new_simple)) => { + if skip_barrier(current_simple, new_simple) { + return; + } + + log::trace!("\ttex {index}: transition simple {current_simple:?} -> {new_simple:?}"); + + barriers.push(PendingTransition { + id: index as _, + selector: texture_selector.clone(), + usage: current_simple..new_simple, + }); + } + (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Many(new_many)) => { + for (selector, new_state) in new_many { + if new_state == TextureUses::UNKNOWN { + continue; + } + + if skip_barrier(current_simple, new_state) { + continue; + } + + log::trace!( + "\ttex {index}: transition {selector:?} {current_simple:?} -> {new_state:?}" + ); + + barriers.push(PendingTransition { + id: index as _, + selector, + usage: current_simple..new_state, + }); + } + } + (SingleOrManyStates::Many(current_complex), SingleOrManyStates::Single(new_simple)) => { + for (mip_id, mip) in current_complex.mips.iter().enumerate() { + let mip_id = mip_id as u32; + + for &(ref layers, current_layer_state) in mip.iter() { + if current_layer_state == TextureUses::UNKNOWN { + continue; + } + + if skip_barrier(current_layer_state, new_simple) { + continue; + } + + log::trace!( + "\ttex {index}: transition mip {mip_id} layers {layers:?} \ + {current_layer_state:?} -> {new_simple:?}" + ); + + barriers.push(PendingTransition { + id: index as _, + selector: TextureSelector { + mips: mip_id..mip_id + 1, + layers: layers.clone(), + }, + usage: current_layer_state..new_simple, + }); + } + } + } + (SingleOrManyStates::Many(current_complex), SingleOrManyStates::Many(new_many)) => { + for (selector, new_state) in new_many { + for mip_id in selector.mips { + strict_assert!((mip_id as usize) < current_complex.mips.len()); + + let mip = unsafe { current_complex.mips.get_unchecked(mip_id as usize) }; + + for (layers, current_layer_state) in mip.iter_filter(&selector.layers) { + if *current_layer_state == TextureUses::UNKNOWN + || new_state == TextureUses::UNKNOWN + { + continue; + } + + if skip_barrier(*current_layer_state, new_state) { + continue; + } + + log::trace!( + "\ttex {index}: transition mip {mip_id} layers {layers:?} \ + {current_layer_state:?} -> {new_state:?}" + ); + + barriers.push(PendingTransition { + id: index as _, + selector: TextureSelector { + mips: mip_id..mip_id + 1, + layers, + }, + usage: *current_layer_state..new_state, + }); + } + } + } + } + } +} + +#[allow(clippy::needless_option_as_deref)] // we use this for reborrowing Option<&mut T> +#[inline(always)] +unsafe fn update( + texture_selector: &TextureSelector, + start_state_set: &mut TextureStateSet, + current_state_set: &mut TextureStateSet, + index: usize, + state_provider: TextureStateProvider<'_>, +) { + let start_simple = unsafe { *start_state_set.simple.get_unchecked(index) }; + + // We only ever need to update the start state here if the state is complex. + // + // If the state is simple, the first insert to the tracker would cover it. + let mut start_complex = None; + if start_simple == TextureUses::COMPLEX { + start_complex = Some(unsafe { start_state_set.complex.get_mut(&index).unwrap_unchecked() }); + } + + let current_simple = unsafe { current_state_set.simple.get_unchecked_mut(index) }; + let current_state = if *current_simple == TextureUses::COMPLEX { + SingleOrManyStates::Many(unsafe { + current_state_set.complex.get_mut(&index).unwrap_unchecked() + }) + } else { + SingleOrManyStates::Single(current_simple) + }; + + let new_state = unsafe { state_provider.get_state(Some(texture_selector), index) }; + + match (current_state, new_state) { + (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Single(new_simple)) => { + *current_simple = new_simple; + } + (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Many(new_many)) => { + // Because we are now demoting this simple state to a complex state, + // we actually need to make a whole new complex state for us to use + // as there wasn't one before. + let mut new_complex = unsafe { + ComplexTextureState::from_selector_state_iter( + texture_selector.clone(), + iter::once((texture_selector.clone(), *current_simple)), + ) + }; + + for (selector, mut new_state) in new_many { + if new_state == TextureUses::UNKNOWN { + new_state = *current_simple; + } + for mip in + &mut new_complex.mips[selector.mips.start as usize..selector.mips.end as usize] + { + for &mut (_, ref mut current_layer_state) in + mip.isolate(&selector.layers, TextureUses::UNKNOWN) + { + *current_layer_state = new_state; + } + + mip.coalesce(); + } + } + + *current_simple = TextureUses::COMPLEX; + current_state_set.complex.insert(index, new_complex); + } + (SingleOrManyStates::Many(current_complex), SingleOrManyStates::Single(new_single)) => { + for (mip_id, mip) in current_complex.mips.iter().enumerate() { + for &(ref layers, current_layer_state) in mip.iter() { + // If this state is unknown, that means that the start is _also_ unknown. + if current_layer_state == TextureUses::UNKNOWN { + if let Some(&mut ref mut start_complex) = start_complex { + strict_assert!(mip_id < start_complex.mips.len()); + + let start_mip = unsafe { start_complex.mips.get_unchecked_mut(mip_id) }; + + for &mut (_, ref mut current_start_state) in + start_mip.isolate(layers, TextureUses::UNKNOWN) + { + strict_assert_eq!(*current_start_state, TextureUses::UNKNOWN); + *current_start_state = new_single; + } + + start_mip.coalesce(); + } + } + } + } + + unsafe { *current_state_set.simple.get_unchecked_mut(index) = new_single }; + unsafe { current_state_set.complex.remove(&index).unwrap_unchecked() }; + } + (SingleOrManyStates::Many(current_complex), SingleOrManyStates::Many(new_many)) => { + for (selector, new_state) in new_many { + if new_state == TextureUses::UNKNOWN { + // We know nothing new + continue; + } + + for mip_id in selector.mips { + let mip_id = mip_id as usize; + strict_assert!(mip_id < current_complex.mips.len()); + + let mip = unsafe { current_complex.mips.get_unchecked_mut(mip_id) }; + + for &mut (ref layers, ref mut current_layer_state) in + mip.isolate(&selector.layers, TextureUses::UNKNOWN) + { + if *current_layer_state == TextureUses::UNKNOWN + && new_state != TextureUses::UNKNOWN + { + // We now know something about this subresource that + // we didn't before so we should go back and update + // the start state. + // + // We know we must have starter state be complex, + // otherwise we would know about this state. + strict_assert!(start_complex.is_some()); + + let start_complex = + unsafe { start_complex.as_deref_mut().unwrap_unchecked() }; + + strict_assert!(mip_id < start_complex.mips.len()); + + let start_mip = unsafe { start_complex.mips.get_unchecked_mut(mip_id) }; + + for &mut (_, ref mut current_start_state) in + start_mip.isolate(layers, TextureUses::UNKNOWN) + { + strict_assert_eq!(*current_start_state, TextureUses::UNKNOWN); + *current_start_state = new_state; + } + + start_mip.coalesce(); + } + + *current_layer_state = new_state; + } + + mip.coalesce(); + } + } + } + } +} diff --git a/third_party/rust/wgpu-core/src/validation.rs b/third_party/rust/wgpu-core/src/validation.rs new file mode 100644 index 0000000000..a0947ae83f --- /dev/null +++ b/third_party/rust/wgpu-core/src/validation.rs @@ -0,0 +1,1248 @@ +use crate::{device::bgl, FastHashMap, FastHashSet}; +use arrayvec::ArrayVec; +use std::{collections::hash_map::Entry, fmt}; +use thiserror::Error; +use wgt::{BindGroupLayoutEntry, BindingType}; + +#[derive(Debug)] +enum ResourceType { + Buffer { + size: wgt::BufferSize, + }, + Texture { + dim: naga::ImageDimension, + arrayed: bool, + class: naga::ImageClass, + }, + Sampler { + comparison: bool, + }, +} + +#[derive(Debug)] +struct Resource { + #[allow(unused)] + name: Option<String>, + bind: naga::ResourceBinding, + ty: ResourceType, + class: naga::AddressSpace, +} + +#[derive(Clone, Copy, Debug)] +enum NumericDimension { + Scalar, + Vector(naga::VectorSize), + Matrix(naga::VectorSize, naga::VectorSize), +} + +impl fmt::Display for NumericDimension { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::Scalar => write!(f, ""), + Self::Vector(size) => write!(f, "x{}", size as u8), + Self::Matrix(columns, rows) => write!(f, "x{}{}", columns as u8, rows as u8), + } + } +} + +impl NumericDimension { + fn num_components(&self) -> u32 { + match *self { + Self::Scalar => 1, + Self::Vector(size) => size as u32, + Self::Matrix(w, h) => w as u32 * h as u32, + } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct NumericType { + dim: NumericDimension, + scalar: naga::Scalar, +} + +impl fmt::Display for NumericType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{:?}{}{}", + self.scalar.kind, + self.scalar.width * 8, + self.dim + ) + } +} + +#[derive(Clone, Debug)] +pub struct InterfaceVar { + pub ty: NumericType, + interpolation: Option<naga::Interpolation>, + sampling: Option<naga::Sampling>, +} + +impl InterfaceVar { + pub fn vertex_attribute(format: wgt::VertexFormat) -> Self { + InterfaceVar { + ty: NumericType::from_vertex_format(format), + interpolation: None, + sampling: None, + } + } +} + +impl fmt::Display for InterfaceVar { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{} interpolated as {:?} with sampling {:?}", + self.ty, self.interpolation, self.sampling + ) + } +} + +#[derive(Debug)] +enum Varying { + Local { location: u32, iv: InterfaceVar }, + BuiltIn(naga::BuiltIn), +} + +#[allow(unused)] +#[derive(Debug)] +struct SpecializationConstant { + id: u32, + ty: NumericType, +} + +#[derive(Debug, Default)] +struct EntryPoint { + inputs: Vec<Varying>, + outputs: Vec<Varying>, + resources: Vec<naga::Handle<Resource>>, + #[allow(unused)] + spec_constants: Vec<SpecializationConstant>, + sampling_pairs: FastHashSet<(naga::Handle<Resource>, naga::Handle<Resource>)>, + workgroup_size: [u32; 3], + dual_source_blending: bool, +} + +#[derive(Debug)] +pub struct Interface { + limits: wgt::Limits, + features: wgt::Features, + resources: naga::Arena<Resource>, + entry_points: FastHashMap<(naga::ShaderStage, String), EntryPoint>, +} + +#[derive(Clone, Debug, Error)] +#[error("Buffer usage is {actual:?} which does not contain required usage {expected:?}")] +pub struct MissingBufferUsageError { + pub(crate) actual: wgt::BufferUsages, + pub(crate) expected: wgt::BufferUsages, +} + +/// Checks that the given buffer usage contains the required buffer usage, +/// returns an error otherwise. +pub fn check_buffer_usage( + actual: wgt::BufferUsages, + expected: wgt::BufferUsages, +) -> Result<(), MissingBufferUsageError> { + if !actual.contains(expected) { + Err(MissingBufferUsageError { actual, expected }) + } else { + Ok(()) + } +} + +#[derive(Clone, Debug, Error)] +#[error("Texture usage is {actual:?} which does not contain required usage {expected:?}")] +pub struct MissingTextureUsageError { + pub(crate) actual: wgt::TextureUsages, + pub(crate) expected: wgt::TextureUsages, +} + +/// Checks that the given texture usage contains the required texture usage, +/// returns an error otherwise. +pub fn check_texture_usage( + actual: wgt::TextureUsages, + expected: wgt::TextureUsages, +) -> Result<(), MissingTextureUsageError> { + if !actual.contains(expected) { + Err(MissingTextureUsageError { actual, expected }) + } else { + Ok(()) + } +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum BindingError { + #[error("Binding is missing from the pipeline layout")] + Missing, + #[error("Visibility flags don't include the shader stage")] + Invisible, + #[error("Type on the shader side does not match the pipeline binding")] + WrongType, + #[error("Storage class {binding:?} doesn't match the shader {shader:?}")] + WrongAddressSpace { + binding: naga::AddressSpace, + shader: naga::AddressSpace, + }, + #[error("Buffer structure size {0}, added to one element of an unbound array, if it's the last field, ended up greater than the given `min_binding_size`")] + WrongBufferSize(wgt::BufferSize), + #[error("View dimension {dim:?} (is array: {is_array}) doesn't match the binding {binding:?}")] + WrongTextureViewDimension { + dim: naga::ImageDimension, + is_array: bool, + binding: BindingType, + }, + #[error("Texture class {binding:?} doesn't match the shader {shader:?}")] + WrongTextureClass { + binding: naga::ImageClass, + shader: naga::ImageClass, + }, + #[error("Comparison flag doesn't match the shader")] + WrongSamplerComparison, + #[error("Derived bind group layout type is not consistent between stages")] + InconsistentlyDerivedType, + #[error("Texture format {0:?} is not supported for storage use")] + BadStorageFormat(wgt::TextureFormat), + #[error( + "Storage texture with access {0:?} doesn't have a matching supported `StorageTextureAccess`" + )] + UnsupportedTextureStorageAccess(naga::StorageAccess), +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum FilteringError { + #[error("Integer textures can't be sampled with a filtering sampler")] + Integer, + #[error("Non-filterable float textures can't be sampled with a filtering sampler")] + Float, +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum InputError { + #[error("Input is not provided by the earlier stage in the pipeline")] + Missing, + #[error("Input type is not compatible with the provided {0}")] + WrongType(NumericType), + #[error("Input interpolation doesn't match provided {0:?}")] + InterpolationMismatch(Option<naga::Interpolation>), + #[error("Input sampling doesn't match provided {0:?}")] + SamplingMismatch(Option<naga::Sampling>), +} + +/// Errors produced when validating a programmable stage of a pipeline. +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum StageError { + #[error("Shader module is invalid")] + InvalidModule, + #[error( + "Shader entry point's workgroup size {current:?} ({current_total} total invocations) must be less or equal to the per-dimension limit {limit:?} and the total invocation limit {total}" + )] + InvalidWorkgroupSize { + current: [u32; 3], + current_total: u32, + limit: [u32; 3], + total: u32, + }, + #[error("Shader uses {used} inter-stage components above the limit of {limit}")] + TooManyVaryings { used: u32, limit: u32 }, + #[error("Unable to find entry point '{0}'")] + MissingEntryPoint(String), + #[error("Shader global {0:?} is not available in the pipeline layout")] + Binding(naga::ResourceBinding, #[source] BindingError), + #[error("Unable to filter the texture ({texture:?}) by the sampler ({sampler:?})")] + Filtering { + texture: naga::ResourceBinding, + sampler: naga::ResourceBinding, + #[source] + error: FilteringError, + }, + #[error("Location[{location}] {var} is not provided by the previous stage outputs")] + Input { + location: wgt::ShaderLocation, + var: InterfaceVar, + #[source] + error: InputError, + }, + #[error("Location[{location}] is provided by the previous stage output but is not consumed as input by this stage.")] + InputNotConsumed { location: wgt::ShaderLocation }, +} + +fn map_storage_format_to_naga(format: wgt::TextureFormat) -> Option<naga::StorageFormat> { + use naga::StorageFormat as Sf; + use wgt::TextureFormat as Tf; + + Some(match format { + Tf::R8Unorm => Sf::R8Unorm, + Tf::R8Snorm => Sf::R8Snorm, + Tf::R8Uint => Sf::R8Uint, + Tf::R8Sint => Sf::R8Sint, + + Tf::R16Uint => Sf::R16Uint, + Tf::R16Sint => Sf::R16Sint, + Tf::R16Float => Sf::R16Float, + Tf::Rg8Unorm => Sf::Rg8Unorm, + Tf::Rg8Snorm => Sf::Rg8Snorm, + Tf::Rg8Uint => Sf::Rg8Uint, + Tf::Rg8Sint => Sf::Rg8Sint, + + Tf::R32Uint => Sf::R32Uint, + Tf::R32Sint => Sf::R32Sint, + Tf::R32Float => Sf::R32Float, + Tf::Rg16Uint => Sf::Rg16Uint, + Tf::Rg16Sint => Sf::Rg16Sint, + Tf::Rg16Float => Sf::Rg16Float, + Tf::Rgba8Unorm => Sf::Rgba8Unorm, + Tf::Rgba8Snorm => Sf::Rgba8Snorm, + Tf::Rgba8Uint => Sf::Rgba8Uint, + Tf::Rgba8Sint => Sf::Rgba8Sint, + Tf::Bgra8Unorm => Sf::Bgra8Unorm, + + Tf::Rgb10a2Uint => Sf::Rgb10a2Uint, + Tf::Rgb10a2Unorm => Sf::Rgb10a2Unorm, + Tf::Rg11b10Float => Sf::Rg11b10Float, + + Tf::Rg32Uint => Sf::Rg32Uint, + Tf::Rg32Sint => Sf::Rg32Sint, + Tf::Rg32Float => Sf::Rg32Float, + Tf::Rgba16Uint => Sf::Rgba16Uint, + Tf::Rgba16Sint => Sf::Rgba16Sint, + Tf::Rgba16Float => Sf::Rgba16Float, + + Tf::Rgba32Uint => Sf::Rgba32Uint, + Tf::Rgba32Sint => Sf::Rgba32Sint, + Tf::Rgba32Float => Sf::Rgba32Float, + + Tf::R16Unorm => Sf::R16Unorm, + Tf::R16Snorm => Sf::R16Snorm, + Tf::Rg16Unorm => Sf::Rg16Unorm, + Tf::Rg16Snorm => Sf::Rg16Snorm, + Tf::Rgba16Unorm => Sf::Rgba16Unorm, + Tf::Rgba16Snorm => Sf::Rgba16Snorm, + + _ => return None, + }) +} + +fn map_storage_format_from_naga(format: naga::StorageFormat) -> wgt::TextureFormat { + use naga::StorageFormat as Sf; + use wgt::TextureFormat as Tf; + + match format { + Sf::R8Unorm => Tf::R8Unorm, + Sf::R8Snorm => Tf::R8Snorm, + Sf::R8Uint => Tf::R8Uint, + Sf::R8Sint => Tf::R8Sint, + + Sf::R16Uint => Tf::R16Uint, + Sf::R16Sint => Tf::R16Sint, + Sf::R16Float => Tf::R16Float, + Sf::Rg8Unorm => Tf::Rg8Unorm, + Sf::Rg8Snorm => Tf::Rg8Snorm, + Sf::Rg8Uint => Tf::Rg8Uint, + Sf::Rg8Sint => Tf::Rg8Sint, + + Sf::R32Uint => Tf::R32Uint, + Sf::R32Sint => Tf::R32Sint, + Sf::R32Float => Tf::R32Float, + Sf::Rg16Uint => Tf::Rg16Uint, + Sf::Rg16Sint => Tf::Rg16Sint, + Sf::Rg16Float => Tf::Rg16Float, + Sf::Rgba8Unorm => Tf::Rgba8Unorm, + Sf::Rgba8Snorm => Tf::Rgba8Snorm, + Sf::Rgba8Uint => Tf::Rgba8Uint, + Sf::Rgba8Sint => Tf::Rgba8Sint, + Sf::Bgra8Unorm => Tf::Bgra8Unorm, + + Sf::Rgb10a2Uint => Tf::Rgb10a2Uint, + Sf::Rgb10a2Unorm => Tf::Rgb10a2Unorm, + Sf::Rg11b10Float => Tf::Rg11b10Float, + + Sf::Rg32Uint => Tf::Rg32Uint, + Sf::Rg32Sint => Tf::Rg32Sint, + Sf::Rg32Float => Tf::Rg32Float, + Sf::Rgba16Uint => Tf::Rgba16Uint, + Sf::Rgba16Sint => Tf::Rgba16Sint, + Sf::Rgba16Float => Tf::Rgba16Float, + + Sf::Rgba32Uint => Tf::Rgba32Uint, + Sf::Rgba32Sint => Tf::Rgba32Sint, + Sf::Rgba32Float => Tf::Rgba32Float, + + Sf::R16Unorm => Tf::R16Unorm, + Sf::R16Snorm => Tf::R16Snorm, + Sf::Rg16Unorm => Tf::Rg16Unorm, + Sf::Rg16Snorm => Tf::Rg16Snorm, + Sf::Rgba16Unorm => Tf::Rgba16Unorm, + Sf::Rgba16Snorm => Tf::Rgba16Snorm, + } +} + +impl Resource { + fn check_binding_use(&self, entry: &BindGroupLayoutEntry) -> Result<(), BindingError> { + match self.ty { + ResourceType::Buffer { size } => { + let min_size = match entry.ty { + BindingType::Buffer { + ty, + has_dynamic_offset: _, + min_binding_size, + } => { + let class = match ty { + wgt::BufferBindingType::Uniform => naga::AddressSpace::Uniform, + wgt::BufferBindingType::Storage { read_only } => { + let mut naga_access = naga::StorageAccess::LOAD; + naga_access.set(naga::StorageAccess::STORE, !read_only); + naga::AddressSpace::Storage { + access: naga_access, + } + } + }; + if self.class != class { + return Err(BindingError::WrongAddressSpace { + binding: class, + shader: self.class, + }); + } + min_binding_size + } + _ => return Err(BindingError::WrongType), + }; + match min_size { + Some(non_zero) if non_zero < size => { + return Err(BindingError::WrongBufferSize(size)) + } + _ => (), + } + } + ResourceType::Sampler { comparison } => match entry.ty { + BindingType::Sampler(ty) => { + if (ty == wgt::SamplerBindingType::Comparison) != comparison { + return Err(BindingError::WrongSamplerComparison); + } + } + _ => return Err(BindingError::WrongType), + }, + ResourceType::Texture { + dim, + arrayed, + class, + } => { + let view_dimension = match entry.ty { + BindingType::Texture { view_dimension, .. } + | BindingType::StorageTexture { view_dimension, .. } => view_dimension, + _ => { + return Err(BindingError::WrongTextureViewDimension { + dim, + is_array: false, + binding: entry.ty, + }) + } + }; + if arrayed { + match (dim, view_dimension) { + (naga::ImageDimension::D2, wgt::TextureViewDimension::D2Array) => (), + (naga::ImageDimension::Cube, wgt::TextureViewDimension::CubeArray) => (), + _ => { + return Err(BindingError::WrongTextureViewDimension { + dim, + is_array: true, + binding: entry.ty, + }) + } + } + } else { + match (dim, view_dimension) { + (naga::ImageDimension::D1, wgt::TextureViewDimension::D1) => (), + (naga::ImageDimension::D2, wgt::TextureViewDimension::D2) => (), + (naga::ImageDimension::D3, wgt::TextureViewDimension::D3) => (), + (naga::ImageDimension::Cube, wgt::TextureViewDimension::Cube) => (), + _ => { + return Err(BindingError::WrongTextureViewDimension { + dim, + is_array: false, + binding: entry.ty, + }) + } + } + } + let expected_class = match entry.ty { + BindingType::Texture { + sample_type, + view_dimension: _, + multisampled: multi, + } => match sample_type { + wgt::TextureSampleType::Float { .. } => naga::ImageClass::Sampled { + kind: naga::ScalarKind::Float, + multi, + }, + wgt::TextureSampleType::Sint => naga::ImageClass::Sampled { + kind: naga::ScalarKind::Sint, + multi, + }, + wgt::TextureSampleType::Uint => naga::ImageClass::Sampled { + kind: naga::ScalarKind::Uint, + multi, + }, + wgt::TextureSampleType::Depth => naga::ImageClass::Depth { multi }, + }, + BindingType::StorageTexture { + access, + format, + view_dimension: _, + } => { + let naga_format = map_storage_format_to_naga(format) + .ok_or(BindingError::BadStorageFormat(format))?; + let naga_access = match access { + wgt::StorageTextureAccess::ReadOnly => naga::StorageAccess::LOAD, + wgt::StorageTextureAccess::WriteOnly => naga::StorageAccess::STORE, + wgt::StorageTextureAccess::ReadWrite => naga::StorageAccess::all(), + }; + naga::ImageClass::Storage { + format: naga_format, + access: naga_access, + } + } + _ => return Err(BindingError::WrongType), + }; + if class != expected_class { + return Err(BindingError::WrongTextureClass { + binding: expected_class, + shader: class, + }); + } + } + }; + + Ok(()) + } + + fn derive_binding_type(&self) -> Result<BindingType, BindingError> { + Ok(match self.ty { + ResourceType::Buffer { size } => BindingType::Buffer { + ty: match self.class { + naga::AddressSpace::Uniform => wgt::BufferBindingType::Uniform, + naga::AddressSpace::Storage { access } => wgt::BufferBindingType::Storage { + read_only: access == naga::StorageAccess::LOAD, + }, + _ => return Err(BindingError::WrongType), + }, + has_dynamic_offset: false, + min_binding_size: Some(size), + }, + ResourceType::Sampler { comparison } => BindingType::Sampler(if comparison { + wgt::SamplerBindingType::Comparison + } else { + wgt::SamplerBindingType::Filtering + }), + ResourceType::Texture { + dim, + arrayed, + class, + } => { + let view_dimension = match dim { + naga::ImageDimension::D1 => wgt::TextureViewDimension::D1, + naga::ImageDimension::D2 if arrayed => wgt::TextureViewDimension::D2Array, + naga::ImageDimension::D2 => wgt::TextureViewDimension::D2, + naga::ImageDimension::D3 => wgt::TextureViewDimension::D3, + naga::ImageDimension::Cube if arrayed => wgt::TextureViewDimension::CubeArray, + naga::ImageDimension::Cube => wgt::TextureViewDimension::Cube, + }; + match class { + naga::ImageClass::Sampled { multi, kind } => BindingType::Texture { + sample_type: match kind { + naga::ScalarKind::Float => { + wgt::TextureSampleType::Float { filterable: true } + } + naga::ScalarKind::Sint => wgt::TextureSampleType::Sint, + naga::ScalarKind::Uint => wgt::TextureSampleType::Uint, + naga::ScalarKind::AbstractInt + | naga::ScalarKind::AbstractFloat + | naga::ScalarKind::Bool => unreachable!(), + }, + view_dimension, + multisampled: multi, + }, + naga::ImageClass::Depth { multi } => BindingType::Texture { + sample_type: wgt::TextureSampleType::Depth, + view_dimension, + multisampled: multi, + }, + naga::ImageClass::Storage { format, access } => BindingType::StorageTexture { + access: { + const LOAD_STORE: naga::StorageAccess = naga::StorageAccess::all(); + match access { + naga::StorageAccess::LOAD => wgt::StorageTextureAccess::ReadOnly, + naga::StorageAccess::STORE => wgt::StorageTextureAccess::WriteOnly, + LOAD_STORE => wgt::StorageTextureAccess::ReadWrite, + _ => unreachable!(), + } + }, + view_dimension, + format: { + let f = map_storage_format_from_naga(format); + let original = map_storage_format_to_naga(f) + .ok_or(BindingError::BadStorageFormat(f))?; + debug_assert_eq!(format, original); + f + }, + }, + } + } + }) + } +} + +impl NumericType { + fn from_vertex_format(format: wgt::VertexFormat) -> Self { + use naga::{Scalar, VectorSize as Vs}; + use wgt::VertexFormat as Vf; + + let (dim, scalar) = match format { + Vf::Uint32 => (NumericDimension::Scalar, Scalar::U32), + Vf::Uint8x2 | Vf::Uint16x2 | Vf::Uint32x2 => { + (NumericDimension::Vector(Vs::Bi), Scalar::U32) + } + Vf::Uint32x3 => (NumericDimension::Vector(Vs::Tri), Scalar::U32), + Vf::Uint8x4 | Vf::Uint16x4 | Vf::Uint32x4 => { + (NumericDimension::Vector(Vs::Quad), Scalar::U32) + } + Vf::Sint32 => (NumericDimension::Scalar, Scalar::I32), + Vf::Sint8x2 | Vf::Sint16x2 | Vf::Sint32x2 => { + (NumericDimension::Vector(Vs::Bi), Scalar::I32) + } + Vf::Sint32x3 => (NumericDimension::Vector(Vs::Tri), Scalar::I32), + Vf::Sint8x4 | Vf::Sint16x4 | Vf::Sint32x4 => { + (NumericDimension::Vector(Vs::Quad), Scalar::I32) + } + Vf::Float32 => (NumericDimension::Scalar, Scalar::F32), + Vf::Unorm8x2 + | Vf::Snorm8x2 + | Vf::Unorm16x2 + | Vf::Snorm16x2 + | Vf::Float16x2 + | Vf::Float32x2 => (NumericDimension::Vector(Vs::Bi), Scalar::F32), + Vf::Float32x3 => (NumericDimension::Vector(Vs::Tri), Scalar::F32), + Vf::Unorm8x4 + | Vf::Snorm8x4 + | Vf::Unorm16x4 + | Vf::Snorm16x4 + | Vf::Float16x4 + | Vf::Float32x4 => (NumericDimension::Vector(Vs::Quad), Scalar::F32), + Vf::Float64 => (NumericDimension::Scalar, Scalar::F64), + Vf::Float64x2 => (NumericDimension::Vector(Vs::Bi), Scalar::F64), + Vf::Float64x3 => (NumericDimension::Vector(Vs::Tri), Scalar::F64), + Vf::Float64x4 => (NumericDimension::Vector(Vs::Quad), Scalar::F64), + }; + + NumericType { + dim, + //Note: Shader always sees data as int, uint, or float. + // It doesn't know if the original is normalized in a tighter form. + scalar, + } + } + + fn from_texture_format(format: wgt::TextureFormat) -> Self { + use naga::{Scalar, VectorSize as Vs}; + use wgt::TextureFormat as Tf; + + let (dim, scalar) = match format { + Tf::R8Unorm | Tf::R8Snorm | Tf::R16Float | Tf::R32Float => { + (NumericDimension::Scalar, Scalar::F32) + } + Tf::R8Uint | Tf::R16Uint | Tf::R32Uint => (NumericDimension::Scalar, Scalar::U32), + Tf::R8Sint | Tf::R16Sint | Tf::R32Sint => (NumericDimension::Scalar, Scalar::I32), + Tf::Rg8Unorm | Tf::Rg8Snorm | Tf::Rg16Float | Tf::Rg32Float => { + (NumericDimension::Vector(Vs::Bi), Scalar::F32) + } + Tf::Rg8Uint | Tf::Rg16Uint | Tf::Rg32Uint => { + (NumericDimension::Vector(Vs::Bi), Scalar::U32) + } + Tf::Rg8Sint | Tf::Rg16Sint | Tf::Rg32Sint => { + (NumericDimension::Vector(Vs::Bi), Scalar::I32) + } + Tf::R16Snorm | Tf::R16Unorm => (NumericDimension::Scalar, Scalar::F32), + Tf::Rg16Snorm | Tf::Rg16Unorm => (NumericDimension::Vector(Vs::Bi), Scalar::F32), + Tf::Rgba16Snorm | Tf::Rgba16Unorm => (NumericDimension::Vector(Vs::Quad), Scalar::F32), + Tf::Rgba8Unorm + | Tf::Rgba8UnormSrgb + | Tf::Rgba8Snorm + | Tf::Bgra8Unorm + | Tf::Bgra8UnormSrgb + | Tf::Rgb10a2Unorm + | Tf::Rgba16Float + | Tf::Rgba32Float => (NumericDimension::Vector(Vs::Quad), Scalar::F32), + Tf::Rgba8Uint | Tf::Rgba16Uint | Tf::Rgba32Uint | Tf::Rgb10a2Uint => { + (NumericDimension::Vector(Vs::Quad), Scalar::U32) + } + Tf::Rgba8Sint | Tf::Rgba16Sint | Tf::Rgba32Sint => { + (NumericDimension::Vector(Vs::Quad), Scalar::I32) + } + Tf::Rg11b10Float => (NumericDimension::Vector(Vs::Tri), Scalar::F32), + Tf::Stencil8 + | Tf::Depth16Unorm + | Tf::Depth32Float + | Tf::Depth32FloatStencil8 + | Tf::Depth24Plus + | Tf::Depth24PlusStencil8 => { + panic!("Unexpected depth format") + } + Tf::NV12 => panic!("Unexpected nv12 format"), + Tf::Rgb9e5Ufloat => (NumericDimension::Vector(Vs::Tri), Scalar::F32), + Tf::Bc1RgbaUnorm + | Tf::Bc1RgbaUnormSrgb + | Tf::Bc2RgbaUnorm + | Tf::Bc2RgbaUnormSrgb + | Tf::Bc3RgbaUnorm + | Tf::Bc3RgbaUnormSrgb + | Tf::Bc7RgbaUnorm + | Tf::Bc7RgbaUnormSrgb + | Tf::Etc2Rgb8A1Unorm + | Tf::Etc2Rgb8A1UnormSrgb + | Tf::Etc2Rgba8Unorm + | Tf::Etc2Rgba8UnormSrgb => (NumericDimension::Vector(Vs::Quad), Scalar::F32), + Tf::Bc4RUnorm | Tf::Bc4RSnorm | Tf::EacR11Unorm | Tf::EacR11Snorm => { + (NumericDimension::Scalar, Scalar::F32) + } + Tf::Bc5RgUnorm | Tf::Bc5RgSnorm | Tf::EacRg11Unorm | Tf::EacRg11Snorm => { + (NumericDimension::Vector(Vs::Bi), Scalar::F32) + } + Tf::Bc6hRgbUfloat | Tf::Bc6hRgbFloat | Tf::Etc2Rgb8Unorm | Tf::Etc2Rgb8UnormSrgb => { + (NumericDimension::Vector(Vs::Tri), Scalar::F32) + } + Tf::Astc { + block: _, + channel: _, + } => (NumericDimension::Vector(Vs::Quad), Scalar::F32), + }; + + NumericType { + dim, + //Note: Shader always sees data as int, uint, or float. + // It doesn't know if the original is normalized in a tighter form. + scalar, + } + } + + fn is_subtype_of(&self, other: &NumericType) -> bool { + if self.scalar.width > other.scalar.width { + return false; + } + if self.scalar.kind != other.scalar.kind { + return false; + } + match (self.dim, other.dim) { + (NumericDimension::Scalar, NumericDimension::Scalar) => true, + (NumericDimension::Scalar, NumericDimension::Vector(_)) => true, + (NumericDimension::Vector(s0), NumericDimension::Vector(s1)) => s0 <= s1, + (NumericDimension::Matrix(c0, r0), NumericDimension::Matrix(c1, r1)) => { + c0 == c1 && r0 == r1 + } + _ => false, + } + } + + fn is_compatible_with(&self, other: &NumericType) -> bool { + if self.scalar.kind != other.scalar.kind { + return false; + } + match (self.dim, other.dim) { + (NumericDimension::Scalar, NumericDimension::Scalar) => true, + (NumericDimension::Scalar, NumericDimension::Vector(_)) => true, + (NumericDimension::Vector(_), NumericDimension::Vector(_)) => true, + (NumericDimension::Matrix(..), NumericDimension::Matrix(..)) => true, + _ => false, + } + } +} + +/// Return true if the fragment `format` is covered by the provided `output`. +pub fn check_texture_format( + format: wgt::TextureFormat, + output: &NumericType, +) -> Result<(), NumericType> { + let nt = NumericType::from_texture_format(format); + if nt.is_subtype_of(output) { + Ok(()) + } else { + Err(nt) + } +} + +pub enum BindingLayoutSource<'a> { + /// The binding layout is derived from the pipeline layout. + /// + /// This will be filled in by the shader binding validation, as it iterates the shader's interfaces. + Derived(ArrayVec<bgl::EntryMap, { hal::MAX_BIND_GROUPS }>), + /// The binding layout is provided by the user in BGLs. + /// + /// This will be validated against the shader's interfaces. + Provided(ArrayVec<&'a bgl::EntryMap, { hal::MAX_BIND_GROUPS }>), +} + +impl<'a> BindingLayoutSource<'a> { + pub fn new_derived(limits: &wgt::Limits) -> Self { + let mut array = ArrayVec::new(); + for _ in 0..limits.max_bind_groups { + array.push(Default::default()); + } + BindingLayoutSource::Derived(array) + } +} + +pub type StageIo = FastHashMap<wgt::ShaderLocation, InterfaceVar>; + +impl Interface { + fn populate( + list: &mut Vec<Varying>, + binding: Option<&naga::Binding>, + ty: naga::Handle<naga::Type>, + arena: &naga::UniqueArena<naga::Type>, + ) { + let numeric_ty = match arena[ty].inner { + naga::TypeInner::Scalar(scalar) => NumericType { + dim: NumericDimension::Scalar, + scalar, + }, + naga::TypeInner::Vector { size, scalar } => NumericType { + dim: NumericDimension::Vector(size), + scalar, + }, + naga::TypeInner::Matrix { + columns, + rows, + scalar, + } => NumericType { + dim: NumericDimension::Matrix(columns, rows), + scalar, + }, + naga::TypeInner::Struct { ref members, .. } => { + for member in members { + Self::populate(list, member.binding.as_ref(), member.ty, arena); + } + return; + } + ref other => { + //Note: technically this should be at least `log::error`, but + // the reality is - every shader coming from `glslc` outputs an array + // of clip distances and hits this path :( + // So we lower it to `log::warn` to be less annoying. + log::warn!("Unexpected varying type: {:?}", other); + return; + } + }; + + let varying = match binding { + Some(&naga::Binding::Location { + location, + interpolation, + sampling, + .. // second_blend_source + }) => Varying::Local { + location, + iv: InterfaceVar { + ty: numeric_ty, + interpolation, + sampling, + }, + }, + Some(&naga::Binding::BuiltIn(built_in)) => Varying::BuiltIn(built_in), + None => { + log::error!("Missing binding for a varying"); + return; + } + }; + list.push(varying); + } + + pub fn new( + module: &naga::Module, + info: &naga::valid::ModuleInfo, + limits: wgt::Limits, + features: wgt::Features, + ) -> Self { + let mut resources = naga::Arena::new(); + let mut resource_mapping = FastHashMap::default(); + for (var_handle, var) in module.global_variables.iter() { + let bind = match var.binding { + Some(ref br) => br.clone(), + _ => continue, + }; + let naga_ty = &module.types[var.ty].inner; + + let inner_ty = match *naga_ty { + naga::TypeInner::BindingArray { base, .. } => &module.types[base].inner, + ref ty => ty, + }; + + let ty = match *inner_ty { + naga::TypeInner::Image { + dim, + arrayed, + class, + } => ResourceType::Texture { + dim, + arrayed, + class, + }, + naga::TypeInner::Sampler { comparison } => ResourceType::Sampler { comparison }, + naga::TypeInner::Array { stride, .. } => ResourceType::Buffer { + size: wgt::BufferSize::new(stride as u64).unwrap(), + }, + ref other => ResourceType::Buffer { + size: wgt::BufferSize::new(other.size(module.to_ctx()) as u64).unwrap(), + }, + }; + let handle = resources.append( + Resource { + name: var.name.clone(), + bind, + ty, + class: var.space, + }, + Default::default(), + ); + resource_mapping.insert(var_handle, handle); + } + + let mut entry_points = FastHashMap::default(); + entry_points.reserve(module.entry_points.len()); + for (index, entry_point) in module.entry_points.iter().enumerate() { + let info = info.get_entry_point(index); + let mut ep = EntryPoint::default(); + for arg in entry_point.function.arguments.iter() { + Self::populate(&mut ep.inputs, arg.binding.as_ref(), arg.ty, &module.types); + } + if let Some(ref result) = entry_point.function.result { + Self::populate( + &mut ep.outputs, + result.binding.as_ref(), + result.ty, + &module.types, + ); + } + + for (var_handle, var) in module.global_variables.iter() { + let usage = info[var_handle]; + if !usage.is_empty() && var.binding.is_some() { + ep.resources.push(resource_mapping[&var_handle]); + } + } + + for key in info.sampling_set.iter() { + ep.sampling_pairs + .insert((resource_mapping[&key.image], resource_mapping[&key.sampler])); + } + ep.dual_source_blending = info.dual_source_blending; + ep.workgroup_size = entry_point.workgroup_size; + + entry_points.insert((entry_point.stage, entry_point.name.clone()), ep); + } + + Self { + limits, + features, + resources, + entry_points, + } + } + + pub fn check_stage( + &self, + layouts: &mut BindingLayoutSource<'_>, + shader_binding_sizes: &mut FastHashMap<naga::ResourceBinding, wgt::BufferSize>, + entry_point_name: &str, + stage_bit: wgt::ShaderStages, + inputs: StageIo, + compare_function: Option<wgt::CompareFunction>, + ) -> Result<StageIo, StageError> { + // Since a shader module can have multiple entry points with the same name, + // we need to look for one with the right execution model. + let shader_stage = match stage_bit { + wgt::ShaderStages::VERTEX => naga::ShaderStage::Vertex, + wgt::ShaderStages::FRAGMENT => naga::ShaderStage::Fragment, + wgt::ShaderStages::COMPUTE => naga::ShaderStage::Compute, + _ => unreachable!(), + }; + let pair = (shader_stage, entry_point_name.to_string()); + let entry_point = self + .entry_points + .get(&pair) + .ok_or(StageError::MissingEntryPoint(pair.1))?; + + // check resources visibility + for &handle in entry_point.resources.iter() { + let res = &self.resources[handle]; + let result = 'err: { + match layouts { + BindingLayoutSource::Provided(layouts) => { + // update the required binding size for this buffer + if let ResourceType::Buffer { size } = res.ty { + match shader_binding_sizes.entry(res.bind.clone()) { + Entry::Occupied(e) => { + *e.into_mut() = size.max(*e.get()); + } + Entry::Vacant(e) => { + e.insert(size); + } + } + } + + let Some(map) = layouts.get(res.bind.group as usize) else { + break 'err Err(BindingError::Missing); + }; + + let Some(entry) = map.get(res.bind.binding) else { + break 'err Err(BindingError::Missing); + }; + + if !entry.visibility.contains(stage_bit) { + break 'err Err(BindingError::Invisible); + } + + res.check_binding_use(entry) + } + BindingLayoutSource::Derived(layouts) => { + let Some(map) = layouts.get_mut(res.bind.group as usize) else { + break 'err Err(BindingError::Missing); + }; + + let ty = match res.derive_binding_type() { + Ok(ty) => ty, + Err(error) => break 'err Err(error), + }; + + match map.entry(res.bind.binding) { + indexmap::map::Entry::Occupied(e) if e.get().ty != ty => { + break 'err Err(BindingError::InconsistentlyDerivedType) + } + indexmap::map::Entry::Occupied(e) => { + e.into_mut().visibility |= stage_bit; + } + indexmap::map::Entry::Vacant(e) => { + e.insert(BindGroupLayoutEntry { + binding: res.bind.binding, + ty, + visibility: stage_bit, + count: None, + }); + } + } + Ok(()) + } + } + }; + if let Err(error) = result { + return Err(StageError::Binding(res.bind.clone(), error)); + } + } + + // Check the compatibility between textures and samplers + // + // We only need to do this if the binding layout is provided by the user, as derived + // layouts will inherently be correctly tagged. + if let BindingLayoutSource::Provided(layouts) = layouts { + for &(texture_handle, sampler_handle) in entry_point.sampling_pairs.iter() { + let texture_bind = &self.resources[texture_handle].bind; + let sampler_bind = &self.resources[sampler_handle].bind; + let texture_layout = layouts[texture_bind.group as usize] + .get(texture_bind.binding) + .unwrap(); + let sampler_layout = layouts[sampler_bind.group as usize] + .get(sampler_bind.binding) + .unwrap(); + assert!(texture_layout.visibility.contains(stage_bit)); + assert!(sampler_layout.visibility.contains(stage_bit)); + + let sampler_filtering = matches!( + sampler_layout.ty, + wgt::BindingType::Sampler(wgt::SamplerBindingType::Filtering) + ); + let texture_sample_type = match texture_layout.ty { + BindingType::Texture { sample_type, .. } => sample_type, + _ => unreachable!(), + }; + + let error = match (sampler_filtering, texture_sample_type) { + (true, wgt::TextureSampleType::Float { filterable: false }) => { + Some(FilteringError::Float) + } + (true, wgt::TextureSampleType::Sint) => Some(FilteringError::Integer), + (true, wgt::TextureSampleType::Uint) => Some(FilteringError::Integer), + _ => None, + }; + + if let Some(error) = error { + return Err(StageError::Filtering { + texture: texture_bind.clone(), + sampler: sampler_bind.clone(), + error, + }); + } + } + } + + // check workgroup size limits + if shader_stage == naga::ShaderStage::Compute { + let max_workgroup_size_limits = [ + self.limits.max_compute_workgroup_size_x, + self.limits.max_compute_workgroup_size_y, + self.limits.max_compute_workgroup_size_z, + ]; + let total_invocations = entry_point.workgroup_size.iter().product::<u32>(); + + if entry_point.workgroup_size.iter().any(|&s| s == 0) + || total_invocations > self.limits.max_compute_invocations_per_workgroup + || entry_point.workgroup_size[0] > max_workgroup_size_limits[0] + || entry_point.workgroup_size[1] > max_workgroup_size_limits[1] + || entry_point.workgroup_size[2] > max_workgroup_size_limits[2] + { + return Err(StageError::InvalidWorkgroupSize { + current: entry_point.workgroup_size, + current_total: total_invocations, + limit: max_workgroup_size_limits, + total: self.limits.max_compute_invocations_per_workgroup, + }); + } + } + + let mut inter_stage_components = 0; + + // check inputs compatibility + for input in entry_point.inputs.iter() { + match *input { + Varying::Local { location, ref iv } => { + let result = + inputs + .get(&location) + .ok_or(InputError::Missing) + .and_then(|provided| { + let (compatible, num_components) = match shader_stage { + // For vertex attributes, there are defaults filled out + // by the driver if data is not provided. + naga::ShaderStage::Vertex => { + // vertex inputs don't count towards inter-stage + (iv.ty.is_compatible_with(&provided.ty), 0) + } + naga::ShaderStage::Fragment => { + if iv.interpolation != provided.interpolation { + return Err(InputError::InterpolationMismatch( + provided.interpolation, + )); + } + if iv.sampling != provided.sampling { + return Err(InputError::SamplingMismatch( + provided.sampling, + )); + } + ( + iv.ty.is_subtype_of(&provided.ty), + iv.ty.dim.num_components(), + ) + } + naga::ShaderStage::Compute => (false, 0), + }; + if compatible { + Ok(num_components) + } else { + Err(InputError::WrongType(provided.ty)) + } + }); + match result { + Ok(num_components) => { + inter_stage_components += num_components; + } + Err(error) => { + return Err(StageError::Input { + location, + var: iv.clone(), + error, + }) + } + } + } + Varying::BuiltIn(_) => {} + } + } + + // Check all vertex outputs and make sure the fragment shader consumes them. + // This requirement is removed if the `SHADER_UNUSED_VERTEX_OUTPUT` feature is enabled. + if shader_stage == naga::ShaderStage::Fragment + && !self + .features + .contains(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT) + { + for &index in inputs.keys() { + // This is a linear scan, but the count should be low enough + // that this should be fine. + let found = entry_point.inputs.iter().any(|v| match *v { + Varying::Local { location, .. } => location == index, + Varying::BuiltIn(_) => false, + }); + + if !found { + return Err(StageError::InputNotConsumed { location: index }); + } + } + } + + if shader_stage == naga::ShaderStage::Vertex { + for output in entry_point.outputs.iter() { + //TODO: count builtins towards the limit? + inter_stage_components += match *output { + Varying::Local { ref iv, .. } => iv.ty.dim.num_components(), + Varying::BuiltIn(_) => 0, + }; + + if let Some( + cmp @ wgt::CompareFunction::Equal | cmp @ wgt::CompareFunction::NotEqual, + ) = compare_function + { + if let Varying::BuiltIn(naga::BuiltIn::Position { invariant: false }) = *output + { + log::warn!( + "Vertex shader with entry point {entry_point_name} outputs a @builtin(position) without the @invariant \ + attribute and is used in a pipeline with {cmp:?}. On some machines, this can cause bad artifacting as {cmp:?} assumes \ + the values output from the vertex shader exactly match the value in the depth buffer. The @invariant attribute on the \ + @builtin(position) vertex output ensures that the exact same pixel depths are used every render." + ); + } + } + } + } + + if inter_stage_components > self.limits.max_inter_stage_shader_components { + return Err(StageError::TooManyVaryings { + used: inter_stage_components, + limit: self.limits.max_inter_stage_shader_components, + }); + } + + let outputs = entry_point + .outputs + .iter() + .filter_map(|output| match *output { + Varying::Local { location, ref iv } => Some((location, iv.clone())), + Varying::BuiltIn(_) => None, + }) + .collect(); + Ok(outputs) + } + + pub fn fragment_uses_dual_source_blending( + &self, + entry_point_name: &str, + ) -> Result<bool, StageError> { + let pair = (naga::ShaderStage::Fragment, entry_point_name.to_string()); + self.entry_points + .get(&pair) + .ok_or(StageError::MissingEntryPoint(pair.1)) + .map(|ep| ep.dual_source_blending) + } +} |