diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/rust/wgpu-core | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/wgpu-core')
38 files changed, 28519 insertions, 0 deletions
diff --git a/third_party/rust/wgpu-core/.cargo-checksum.json b/third_party/rust/wgpu-core/.cargo-checksum.json new file mode 100644 index 0000000000..b13287ab37 --- /dev/null +++ b/third_party/rust/wgpu-core/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"70b60e663e9ad3847924f1a5aa12e88b634aae3a1dff92b691c1c7586e520b75","LICENSE.APACHE":"a6cba85bc92e0cff7a450b1d873c0eaa2e9fc96bf472df0247a26bec77bf3ff9","LICENSE.MIT":"c7fea58d1cfe49634cd92e54fc10a9d871f4b275321a4cd8c09e449122caaeb4","src/binding_model.rs":"3b9180c91a30ffaffaddaa734ee99e7a0e68b1da1bf3963eae447fa69cfae2ef","src/command/bind.rs":"88ca2bedfcd8392efc960f39ca8506024af495dcf1852cf16ae28fbadbdf748d","src/command/bundle.rs":"9420e25cf05f28170cce81df0d20e67f4dbe5b54588c0541e01008dde4bf7b45","src/command/clear.rs":"cc118937b7a0b05225a2417db2f11bb95a095b0ca63561db092a8dfab35d9158","src/command/compute.rs":"f2a08528e39e3cc7049757e9d63d591c8adc9bffa3af66d57eee0e6ab7bc3015","src/command/draw.rs":"ac3541fb46435dd1687b24b24d9896104adde8f662111f232b7b4e8e34a869ea","src/command/memory_init.rs":"347117a84f35927de91e7ab4cc69d19b5b4507a942596c0c725f27b50c428b1e","src/command/mod.rs":"31623431b6dc1c061dedcb838949d4c81d4e95d5516be47524d53b3d2faef3e6","src/command/query.rs":"2c107b22806a003808da2f886ecc74764b016851b7a6cec8caf21385a2e25dee","src/command/render.rs":"d62d2ec5344312f78714d294ffc53686f993accd84925af29700073dc1d1b117","src/command/transfer.rs":"5321de8440653e3c69970816d3df8e628ae31fc7c5a03125f1cc8ed244e13f68","src/conv.rs":"c4506a7f715461d67c2bcfef002f13d3e75d10b2e4f18b0ab9ea8d5bdeb7bc61","src/device/life.rs":"5f58817365cf8b0a76cab83fdb50b3ee69a15b2bd868bc3702c7c29f3189326c","src/device/mod.rs":"5019dcbdda406700b3e7121055eba7936b8cd403517e5711fabaf1421f8fd6a6","src/device/queue.rs":"883a0bd1c8d3cddc46393e9eeb76bedce9ce48d1467a767d9839db7934252ad9","src/device/trace.rs":"9c03f5ec06cae37f294179a2285e2af7a5497db8a5572bf1dbe50136943d69be","src/error.rs":"7e914b25a2b9578672eb4a8b52c778ad947e12e0f24b44a8453cda48a249b39c","src/hub.rs":"1be76bb6c1cae694e891d6798789ed9748535b42b042f44cad09d939dbc802c1","src/id.rs":"608dfbdc118905638762d0a16e4f69b0ecae16bbe20accead2d7d9b7e7281dda","src/init_tracker/buffer.rs":"a0ebf54a1e6d269c7b4aa0ac7bb8b04fd2cea3221a1d058ff33cb683b2aea3e9","src/init_tracker/mod.rs":"0867f79f83555390d0982d1dc6dcf0d4340e10cb89aa633d3c3ecc45deb3c78c","src/init_tracker/texture.rs":"11de3b1968829aec1f165930d9e4f946d6c02c7994ce32925a62f883820aa1e3","src/instance.rs":"12f4204755c67eaf82013142c4e6ff8e3ad4e74adf07c00f7d20caa1fd2a09e6","src/lib.rs":"206c07ef386628a9b8e0f8ad45415c9eb9b55418c3636cc9e6aba63eb6ed5978","src/pipeline.rs":"38e9fff13e7e152378e95dc13822dab62542d4b172f006a34d84eebb70bed0f7","src/present.rs":"7c00b8bc3475d9a100adf5c9e9d0af823aa14b626e923f87ec988022bd4766d1","src/resource.rs":"c23933592838688d86986a0f6c449df6eaefa8b770e10c512d849a7a497b92a7","src/track/buffer.rs":"0b7a8c6acfe55e3f45dd1e7ef1b3ee8bd4844f61bbabf62aa4f0c391b822803c","src/track/metadata.rs":"9565f700661e81fd534a751a0e3d26022238ad8784cc868333da7030b3571473","src/track/mod.rs":"995a8cd73499a26b593d0a419630681ea7942cec8db132ae8a13baf69fe13cdc","src/track/range.rs":"5bbfed6e103b3234d9de8e42057022da6d628c2cc1db6bb51b88f87f2d8adf8b","src/track/stateless.rs":"34942ecdf81b05b75892c891c87e065329870459ff8edfca1f99ec9533c334aa","src/track/texture.rs":"7d38b2f4e0cdb3e56ce1e777d8bca4d73ef40e21c56e11fedd69dc27e789256d","src/validation.rs":"03d3a99410c5f6ec937358206ea76eafff31823ebbe443ab71facf8a8b6a95e8"},"package":null}
\ No newline at end of file diff --git a/third_party/rust/wgpu-core/Cargo.toml b/third_party/rust/wgpu-core/Cargo.toml new file mode 100644 index 0000000000..bc2c212fc7 --- /dev/null +++ b/third_party/rust/wgpu-core/Cargo.toml @@ -0,0 +1,84 @@ +[package] +name = "wgpu-core" +version = "0.14.0" +authors = ["wgpu developers"] +edition = "2021" +description = "WebGPU core logic on wgpu-hal" +homepage = "https://wgpu.rs/" +repository = "https://github.com/gfx-rs/wgpu" +keywords = ["graphics"] +license = "MIT OR Apache-2.0" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[lib] + +[features] +default = [] + +# Backends, passed through to wgpu-hal +metal = ["hal/metal"] +vulkan = ["hal/vulkan"] +gles = ["hal/gles"] +dx11 = ["hal/dx11"] +dx12 = ["hal/dx12"] + +# Support the Renderdoc graphics debugger: +# https://renderdoc.org/ +renderdoc = ["hal/renderdoc"] + +# Compile for the Emscripten POSIX-in-a-web-page emulation environment. +emscripten = ["hal/emscripten"] + +# Apply run-time checks, even in release builds. These are in addition +# to the validation carried out at public APIs in all builds. +strict_asserts = ["wgt/strict_asserts"] +angle = ["hal/gles"] +# Enable API tracing +trace = ["ron", "serde", "wgt/trace", "arrayvec/serde", "naga/serialize"] +# Enable API replaying +replay = ["serde", "wgt/replay", "arrayvec/serde", "naga/deserialize"] +# Enable serializable compute/render passes, and bundle encoders. +serial-pass = ["serde", "wgt/serde", "arrayvec/serde"] +id32 = [] +# Enable `ShaderModuleSource::Wgsl` +wgsl = ["naga/wgsl-in"] +vulkan-portability = ["hal/vulkan"] + +# Features that are intended to work on all platforms. +portable_features = ["gles", "strict_asserts", "trace", "replay", "serial-pass", "id32", "wgsl"] + +[dependencies] +arrayvec = "0.7" +bitflags = "1" +bit-vec = "0.6" +codespan-reporting = "0.11" +fxhash = "0.2.1" +log = "0.4" +# parking_lot 0.12 switches from `winapi` to `windows`; permit either +parking_lot = ">=0.11,<0.13" +profiling = { version = "1", default-features = false } +raw-window-handle = { version = "0.5", optional = true } +ron = { version = "0.8", optional = true } +serde = { version = "1", features = ["serde_derive"], optional = true } +smallvec = "1" +thiserror = "1" + +[dependencies.naga] +git = "https://github.com/gfx-rs/naga" +rev = "e7fc8e6" +version = "0.10" +features = ["clone", "span", "validate"] + +[dependencies.wgt] +package = "wgpu-types" +path = "../wgpu-types" + +[dependencies.hal] +package = "wgpu-hal" +path = "../wgpu-hal" + +[target.'cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))'.dependencies] +web-sys = { version = "0.3.60", features = ["HtmlCanvasElement", "OffscreenCanvas"] } diff --git a/third_party/rust/wgpu-core/LICENSE.APACHE b/third_party/rust/wgpu-core/LICENSE.APACHE new file mode 100644 index 0000000000..d9a10c0d8e --- /dev/null +++ b/third_party/rust/wgpu-core/LICENSE.APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/third_party/rust/wgpu-core/LICENSE.MIT b/third_party/rust/wgpu-core/LICENSE.MIT new file mode 100644 index 0000000000..4699691b8e --- /dev/null +++ b/third_party/rust/wgpu-core/LICENSE.MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 The gfx-rs developers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/rust/wgpu-core/src/binding_model.rs b/third_party/rust/wgpu-core/src/binding_model.rs new file mode 100644 index 0000000000..d64d563b25 --- /dev/null +++ b/third_party/rust/wgpu-core/src/binding_model.rs @@ -0,0 +1,845 @@ +use crate::{ + device::{DeviceError, MissingDownlevelFlags, MissingFeatures, SHADER_STAGE_COUNT}, + error::{ErrorFormatter, PrettyError}, + hub::{HalApi, Resource}, + id::{BindGroupLayoutId, BufferId, DeviceId, SamplerId, TextureId, TextureViewId, Valid}, + init_tracker::{BufferInitTrackerAction, TextureInitTrackerAction}, + track::{BindGroupStates, UsageConflict}, + validation::{MissingBufferUsageError, MissingTextureUsageError}, + FastHashMap, Label, LifeGuard, MultiRefCount, Stored, +}; + +use arrayvec::ArrayVec; + +#[cfg(feature = "replay")] +use serde::Deserialize; +#[cfg(feature = "trace")] +use serde::Serialize; + +use std::{borrow::Cow, ops::Range}; + +use thiserror::Error; + +#[derive(Clone, Debug, Error)] +pub enum BindGroupLayoutEntryError { + #[error("cube dimension is not expected for texture storage")] + StorageTextureCube, + #[error("Read-write and read-only storage textures are not allowed by webgpu, they require the native only feature TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES")] + StorageTextureReadWrite, + #[error("arrays of bindings unsupported for this type of binding")] + ArrayUnsupported, + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), +} + +#[derive(Clone, Debug, Error)] +pub enum CreateBindGroupLayoutError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("conflicting binding at index {0}")] + ConflictBinding(u32), + #[error("binding {binding} entry is invalid")] + Entry { + binding: u32, + #[source] + error: BindGroupLayoutEntryError, + }, + #[error(transparent)] + TooManyBindings(BindingTypeMaxCountError), + #[error("Binding index {binding} is greater than the maximum index {maximum}")] + InvalidBindingIndex { binding: u32, maximum: u32 }, + #[error("Invalid visibility {0:?}")] + InvalidVisibility(wgt::ShaderStages), +} + +//TODO: refactor this to move out `enum BindingError`. + +#[derive(Clone, Debug, Error)] +pub enum CreateBindGroupError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("bind group layout is invalid")] + InvalidLayout, + #[error("buffer {0:?} is invalid or destroyed")] + InvalidBuffer(BufferId), + #[error("texture view {0:?} is invalid")] + InvalidTextureView(TextureViewId), + #[error("texture {0:?} is invalid")] + InvalidTexture(TextureId), + #[error("sampler {0:?} is invalid")] + InvalidSampler(SamplerId), + #[error( + "binding count declared with at most {expected} items, but {actual} items were provided" + )] + BindingArrayPartialLengthMismatch { actual: usize, expected: usize }, + #[error( + "binding count declared with exactly {expected} items, but {actual} items were provided" + )] + BindingArrayLengthMismatch { actual: usize, expected: usize }, + #[error("array binding provided zero elements")] + BindingArrayZeroLength, + #[error("bound buffer range {range:?} does not fit in buffer of size {size}")] + BindingRangeTooLarge { + buffer: BufferId, + range: Range<wgt::BufferAddress>, + size: u64, + }, + #[error("buffer binding size {actual} is less than minimum {min}")] + BindingSizeTooSmall { + buffer: BufferId, + actual: u64, + min: u64, + }, + #[error("buffer binding size is zero")] + BindingZeroSize(BufferId), + #[error("number of bindings in bind group descriptor ({actual}) does not match the number of bindings defined in the bind group layout ({expected})")] + BindingsNumMismatch { actual: usize, expected: usize }, + #[error("binding {0} is used at least twice in the descriptor")] + DuplicateBinding(u32), + #[error("unable to find a corresponding declaration for the given binding {0}")] + MissingBindingDeclaration(u32), + #[error(transparent)] + MissingBufferUsage(#[from] MissingBufferUsageError), + #[error(transparent)] + MissingTextureUsage(#[from] MissingTextureUsageError), + #[error("binding declared as a single item, but bind group is using it as an array")] + SingleBindingExpected, + #[error("buffer offset {0} does not respect device's requested `{1}` limit {2}")] + UnalignedBufferOffset(wgt::BufferAddress, &'static str, u32), + #[error( + "buffer binding {binding} range {given} exceeds `max_*_buffer_binding_size` limit {limit}" + )] + BufferRangeTooLarge { + binding: u32, + given: u32, + limit: u32, + }, + #[error("binding {binding} has a different type ({actual:?}) than the one in the layout ({expected:?})")] + WrongBindingType { + // Index of the binding + binding: u32, + // The type given to the function + actual: wgt::BindingType, + // Human-readable description of expected types + expected: &'static str, + }, + #[error("texture binding {binding} expects multisampled = {layout_multisampled}, but given a view with samples = {view_samples}")] + InvalidTextureMultisample { + binding: u32, + layout_multisampled: bool, + view_samples: u32, + }, + #[error("texture binding {binding} expects sample type = {layout_sample_type:?}, but given a view with format = {view_format:?}")] + InvalidTextureSampleType { + binding: u32, + layout_sample_type: wgt::TextureSampleType, + view_format: wgt::TextureFormat, + }, + #[error("texture binding {binding} expects dimension = {layout_dimension:?}, but given a view with dimension = {view_dimension:?}")] + InvalidTextureDimension { + binding: u32, + layout_dimension: wgt::TextureViewDimension, + view_dimension: wgt::TextureViewDimension, + }, + #[error("storage texture binding {binding} expects format = {layout_format:?}, but given a view with format = {view_format:?}")] + InvalidStorageTextureFormat { + binding: u32, + layout_format: wgt::TextureFormat, + view_format: wgt::TextureFormat, + }, + #[error("storage texture bindings must have a single mip level, but given a view with mip_level_count = {mip_level_count:?} at binding {binding}")] + InvalidStorageTextureMipLevelCount { binding: u32, mip_level_count: u32 }, + #[error("sampler binding {binding} expects comparison = {layout_cmp}, but given a sampler with comparison = {sampler_cmp}")] + WrongSamplerComparison { + binding: u32, + layout_cmp: bool, + sampler_cmp: bool, + }, + #[error("sampler binding {binding} expects filtering = {layout_flt}, but given a sampler with filtering = {sampler_flt}")] + WrongSamplerFiltering { + binding: u32, + layout_flt: bool, + sampler_flt: bool, + }, + #[error("bound texture views can not have both depth and stencil aspects enabled")] + DepthStencilAspect, + #[error("the adapter does not support read access for storages texture of format {0:?}")] + StorageReadNotSupported(wgt::TextureFormat), + #[error(transparent)] + ResourceUsageConflict(#[from] UsageConflict), +} + +impl PrettyError for CreateBindGroupError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + match *self { + Self::BindingZeroSize(id) => { + fmt.buffer_label(&id); + } + Self::BindingRangeTooLarge { buffer, .. } => { + fmt.buffer_label(&buffer); + } + Self::BindingSizeTooSmall { buffer, .. } => { + fmt.buffer_label(&buffer); + } + Self::InvalidBuffer(id) => { + fmt.buffer_label(&id); + } + Self::InvalidTextureView(id) => { + fmt.texture_view_label(&id); + } + Self::InvalidSampler(id) => { + fmt.sampler_label(&id); + } + _ => {} + }; + } +} + +#[derive(Clone, Debug, Error)] +pub enum BindingZone { + #[error("stage {0:?}")] + Stage(wgt::ShaderStages), + #[error("whole pipeline")] + Pipeline, +} + +#[derive(Clone, Debug, Error)] +#[error("too many bindings of type {kind:?} in {zone}, limit is {limit}, count was {count}")] +pub struct BindingTypeMaxCountError { + pub kind: BindingTypeMaxCountErrorKind, + pub zone: BindingZone, + pub limit: u32, + pub count: u32, +} + +#[derive(Clone, Debug)] +pub enum BindingTypeMaxCountErrorKind { + DynamicUniformBuffers, + DynamicStorageBuffers, + SampledTextures, + Samplers, + StorageBuffers, + StorageTextures, + UniformBuffers, +} + +#[derive(Debug, Default)] +pub(crate) struct PerStageBindingTypeCounter { + vertex: u32, + fragment: u32, + compute: u32, +} + +impl PerStageBindingTypeCounter { + pub(crate) fn add(&mut self, stage: wgt::ShaderStages, count: u32) { + if stage.contains(wgt::ShaderStages::VERTEX) { + self.vertex += count; + } + if stage.contains(wgt::ShaderStages::FRAGMENT) { + self.fragment += count; + } + if stage.contains(wgt::ShaderStages::COMPUTE) { + self.compute += count; + } + } + + pub(crate) fn max(&self) -> (BindingZone, u32) { + let max_value = self.vertex.max(self.fragment.max(self.compute)); + let mut stage = wgt::ShaderStages::NONE; + if max_value == self.vertex { + stage |= wgt::ShaderStages::VERTEX + } + if max_value == self.fragment { + stage |= wgt::ShaderStages::FRAGMENT + } + if max_value == self.compute { + stage |= wgt::ShaderStages::COMPUTE + } + (BindingZone::Stage(stage), max_value) + } + + pub(crate) fn merge(&mut self, other: &Self) { + self.vertex = self.vertex.max(other.vertex); + self.fragment = self.fragment.max(other.fragment); + self.compute = self.compute.max(other.compute); + } + + pub(crate) fn validate( + &self, + limit: u32, + kind: BindingTypeMaxCountErrorKind, + ) -> Result<(), BindingTypeMaxCountError> { + let (zone, count) = self.max(); + if limit < count { + Err(BindingTypeMaxCountError { + kind, + zone, + limit, + count, + }) + } else { + Ok(()) + } + } +} + +#[derive(Debug, Default)] +pub(crate) struct BindingTypeMaxCountValidator { + dynamic_uniform_buffers: u32, + dynamic_storage_buffers: u32, + sampled_textures: PerStageBindingTypeCounter, + samplers: PerStageBindingTypeCounter, + storage_buffers: PerStageBindingTypeCounter, + storage_textures: PerStageBindingTypeCounter, + uniform_buffers: PerStageBindingTypeCounter, +} + +impl BindingTypeMaxCountValidator { + pub(crate) fn add_binding(&mut self, binding: &wgt::BindGroupLayoutEntry) { + let count = binding.count.map_or(1, |count| count.get()); + match binding.ty { + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset, + .. + } => { + self.uniform_buffers.add(binding.visibility, count); + if has_dynamic_offset { + self.dynamic_uniform_buffers += count; + } + } + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, + has_dynamic_offset, + .. + } => { + self.storage_buffers.add(binding.visibility, count); + if has_dynamic_offset { + self.dynamic_storage_buffers += count; + } + } + wgt::BindingType::Sampler { .. } => { + self.samplers.add(binding.visibility, count); + } + wgt::BindingType::Texture { .. } => { + self.sampled_textures.add(binding.visibility, count); + } + wgt::BindingType::StorageTexture { .. } => { + self.storage_textures.add(binding.visibility, count); + } + } + } + + pub(crate) fn merge(&mut self, other: &Self) { + self.dynamic_uniform_buffers += other.dynamic_uniform_buffers; + self.dynamic_storage_buffers += other.dynamic_storage_buffers; + self.sampled_textures.merge(&other.sampled_textures); + self.samplers.merge(&other.samplers); + self.storage_buffers.merge(&other.storage_buffers); + self.storage_textures.merge(&other.storage_textures); + self.uniform_buffers.merge(&other.uniform_buffers); + } + + pub(crate) fn validate(&self, limits: &wgt::Limits) -> Result<(), BindingTypeMaxCountError> { + if limits.max_dynamic_uniform_buffers_per_pipeline_layout < self.dynamic_uniform_buffers { + return Err(BindingTypeMaxCountError { + kind: BindingTypeMaxCountErrorKind::DynamicUniformBuffers, + zone: BindingZone::Pipeline, + limit: limits.max_dynamic_uniform_buffers_per_pipeline_layout, + count: self.dynamic_uniform_buffers, + }); + } + if limits.max_dynamic_storage_buffers_per_pipeline_layout < self.dynamic_storage_buffers { + return Err(BindingTypeMaxCountError { + kind: BindingTypeMaxCountErrorKind::DynamicStorageBuffers, + zone: BindingZone::Pipeline, + limit: limits.max_dynamic_storage_buffers_per_pipeline_layout, + count: self.dynamic_storage_buffers, + }); + } + self.sampled_textures.validate( + limits.max_sampled_textures_per_shader_stage, + BindingTypeMaxCountErrorKind::SampledTextures, + )?; + self.storage_buffers.validate( + limits.max_storage_buffers_per_shader_stage, + BindingTypeMaxCountErrorKind::StorageBuffers, + )?; + self.samplers.validate( + limits.max_samplers_per_shader_stage, + BindingTypeMaxCountErrorKind::Samplers, + )?; + self.storage_buffers.validate( + limits.max_storage_buffers_per_shader_stage, + BindingTypeMaxCountErrorKind::StorageBuffers, + )?; + self.storage_textures.validate( + limits.max_storage_textures_per_shader_stage, + BindingTypeMaxCountErrorKind::StorageTextures, + )?; + self.uniform_buffers.validate( + limits.max_uniform_buffers_per_shader_stage, + BindingTypeMaxCountErrorKind::UniformBuffers, + )?; + Ok(()) + } +} + +/// Bindable resource and the slot to bind it to. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub struct BindGroupEntry<'a> { + /// Slot for which binding provides resource. Corresponds to an entry of the same + /// binding index in the [`BindGroupLayoutDescriptor`]. + pub binding: u32, + /// Resource to attach to the binding + pub resource: BindingResource<'a>, +} + +/// Describes a group of bindings and the resources to be bound. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub struct BindGroupDescriptor<'a> { + /// Debug label of the bind group. + /// + /// This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// The [`BindGroupLayout`] that corresponds to this bind group. + pub layout: BindGroupLayoutId, + /// The resources to bind to this bind group. + pub entries: Cow<'a, [BindGroupEntry<'a>]>, +} + +/// Describes a [`BindGroupLayout`]. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct BindGroupLayoutDescriptor<'a> { + /// Debug label of the bind group layout. + /// + /// This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// Array of entries in this BindGroupLayout + pub entries: Cow<'a, [wgt::BindGroupLayoutEntry]>, +} + +pub(crate) type BindEntryMap = FastHashMap<u32, wgt::BindGroupLayoutEntry>; + +/// Bind group layout. +/// +/// The lifetime of BGLs is a bit special. They are only referenced on CPU +/// without considering GPU operations. And on CPU they get manual +/// inc-refs and dec-refs. In particular, the following objects depend on them: +/// - produced bind groups +/// - produced pipeline layouts +/// - pipelines with implicit layouts +#[derive(Debug)] +pub struct BindGroupLayout<A: hal::Api> { + pub(crate) raw: A::BindGroupLayout, + pub(crate) device_id: Stored<DeviceId>, + pub(crate) multi_ref_count: MultiRefCount, + pub(crate) entries: BindEntryMap, + #[allow(unused)] + pub(crate) dynamic_count: usize, + pub(crate) count_validator: BindingTypeMaxCountValidator, + #[cfg(debug_assertions)] + pub(crate) label: String, +} + +impl<A: hal::Api> Resource for BindGroupLayout<A> { + const TYPE: &'static str = "BindGroupLayout"; + + fn life_guard(&self) -> &LifeGuard { + unreachable!() + } + + fn label(&self) -> &str { + #[cfg(debug_assertions)] + return &self.label; + #[cfg(not(debug_assertions))] + return ""; + } +} + +#[derive(Clone, Debug, Error)] +pub enum CreatePipelineLayoutError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("bind group layout {0:?} is invalid")] + InvalidBindGroupLayout(BindGroupLayoutId), + #[error( + "push constant at index {index} has range bound {bound} not aligned to {}", + wgt::PUSH_CONSTANT_ALIGNMENT + )] + MisalignedPushConstantRange { index: usize, bound: u32 }, + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), + #[error("push constant range (index {index}) provides for stage(s) {provided:?} but there exists another range that provides stage(s) {intersected:?}. Each stage may only be provided by one range")] + MoreThanOnePushConstantRangePerStage { + index: usize, + provided: wgt::ShaderStages, + intersected: wgt::ShaderStages, + }, + #[error("push constant at index {index} has range {}..{} which exceeds device push constant size limit 0..{max}", range.start, range.end)] + PushConstantRangeTooLarge { + index: usize, + range: Range<u32>, + max: u32, + }, + #[error(transparent)] + TooManyBindings(BindingTypeMaxCountError), + #[error("bind group layout count {actual} exceeds device bind group limit {max}")] + TooManyGroups { actual: usize, max: usize }, +} + +impl PrettyError for CreatePipelineLayoutError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + if let Self::InvalidBindGroupLayout(id) = *self { + fmt.bind_group_layout_label(&id); + }; + } +} + +#[derive(Clone, Debug, Error)] +pub enum PushConstantUploadError { + #[error("provided push constant with indices {offset}..{end_offset} overruns matching push constant range at index {idx}, with stage(s) {:?} and indices {:?}", range.stages, range.range)] + TooLarge { + offset: u32, + end_offset: u32, + idx: usize, + range: wgt::PushConstantRange, + }, + #[error("provided push constant is for stage(s) {actual:?}, stage with a partial match found at index {idx} with stage(s) {matched:?}, however push constants must be complete matches")] + PartialRangeMatch { + actual: wgt::ShaderStages, + idx: usize, + matched: wgt::ShaderStages, + }, + #[error("provided push constant is for stage(s) {actual:?}, but intersects a push constant range (at index {idx}) with stage(s) {missing:?}. Push constants must provide the stages for all ranges they intersect")] + MissingStages { + actual: wgt::ShaderStages, + idx: usize, + missing: wgt::ShaderStages, + }, + #[error("provided push constant is for stage(s) {actual:?}, however the pipeline layout has no push constant range for the stage(s) {unmatched:?}")] + UnmatchedStages { + actual: wgt::ShaderStages, + unmatched: wgt::ShaderStages, + }, + #[error("provided push constant offset {0} does not respect `PUSH_CONSTANT_ALIGNMENT`")] + Unaligned(u32), +} + +/// Describes a pipeline layout. +/// +/// A `PipelineLayoutDescriptor` can be used to create a pipeline layout. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "trace", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub struct PipelineLayoutDescriptor<'a> { + /// Debug label of the pipeine layout. + /// + /// This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// Bind groups that this pipeline uses. The first entry will provide all the bindings for + /// "set = 0", second entry will provide all the bindings for "set = 1" etc. + pub bind_group_layouts: Cow<'a, [BindGroupLayoutId]>, + /// Set of push constant ranges this pipeline uses. Each shader stage that + /// uses push constants must define the range in push constant memory that + /// corresponds to its single `layout(push_constant)` uniform block. + /// + /// If this array is non-empty, the + /// [`Features::PUSH_CONSTANTS`](wgt::Features::PUSH_CONSTANTS) feature must + /// be enabled. + pub push_constant_ranges: Cow<'a, [wgt::PushConstantRange]>, +} + +#[derive(Debug)] +pub struct PipelineLayout<A: hal::Api> { + pub(crate) raw: A::PipelineLayout, + pub(crate) device_id: Stored<DeviceId>, + pub(crate) life_guard: LifeGuard, + pub(crate) bind_group_layout_ids: ArrayVec<Valid<BindGroupLayoutId>, { hal::MAX_BIND_GROUPS }>, + pub(crate) push_constant_ranges: ArrayVec<wgt::PushConstantRange, { SHADER_STAGE_COUNT }>, +} + +impl<A: hal::Api> PipelineLayout<A> { + /// Validate push constants match up with expected ranges. + pub(crate) fn validate_push_constant_ranges( + &self, + stages: wgt::ShaderStages, + offset: u32, + end_offset: u32, + ) -> Result<(), PushConstantUploadError> { + // Don't need to validate size against the push constant size limit here, + // as push constant ranges are already validated to be within bounds, + // and we validate that they are within the ranges. + + if offset % wgt::PUSH_CONSTANT_ALIGNMENT != 0 { + return Err(PushConstantUploadError::Unaligned(offset)); + } + + // Push constant validation looks very complicated on the surface, but + // the problem can be range-reduced pretty well. + // + // Push constants require (summarized from the vulkan spec): + // 1. For each byte in the range and for each shader stage in stageFlags, + // there must be a push constant range in the layout that includes that + // byte and that stage. + // 2. For each byte in the range and for each push constant range that overlaps that byte, + // `stage` must include all stages in that push constant range’s `stage`. + // + // However there are some additional constraints that help us: + // 3. All push constant ranges are the only range that can access that stage. + // i.e. if one range has VERTEX, no other range has VERTEX + // + // Therefore we can simplify the checks in the following ways: + // - Because 3 guarantees that the push constant range has a unique stage, + // when we check for 1, we can simply check that our entire updated range + // is within a push constant range. i.e. our range for a specific stage cannot + // intersect more than one push constant range. + let mut used_stages = wgt::ShaderStages::NONE; + for (idx, range) in self.push_constant_ranges.iter().enumerate() { + // contains not intersects due to 2 + if stages.contains(range.stages) { + if !(range.range.start <= offset && end_offset <= range.range.end) { + return Err(PushConstantUploadError::TooLarge { + offset, + end_offset, + idx, + range: range.clone(), + }); + } + used_stages |= range.stages; + } else if stages.intersects(range.stages) { + // Will be caught by used stages check below, but we can do this because of 1 + // and is more helpful to the user. + return Err(PushConstantUploadError::PartialRangeMatch { + actual: stages, + idx, + matched: range.stages, + }); + } + + // The push constant range intersects range we are uploading + if offset < range.range.end && range.range.start < end_offset { + // But requires stages we don't provide + if !stages.contains(range.stages) { + return Err(PushConstantUploadError::MissingStages { + actual: stages, + idx, + missing: stages, + }); + } + } + } + if used_stages != stages { + return Err(PushConstantUploadError::UnmatchedStages { + actual: stages, + unmatched: stages - used_stages, + }); + } + Ok(()) + } +} + +impl<A: hal::Api> Resource for PipelineLayout<A> { + const TYPE: &'static str = "PipelineLayout"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +#[repr(C)] +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +#[cfg_attr(feature = "trace", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub struct BufferBinding { + pub buffer_id: BufferId, + pub offset: wgt::BufferAddress, + pub size: Option<wgt::BufferSize>, +} + +// Note: Duplicated in `wgpu-rs` as `BindingResource` +// They're different enough that it doesn't make sense to share a common type +#[derive(Debug, Clone)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub enum BindingResource<'a> { + Buffer(BufferBinding), + BufferArray(Cow<'a, [BufferBinding]>), + Sampler(SamplerId), + SamplerArray(Cow<'a, [SamplerId]>), + TextureView(TextureViewId), + TextureViewArray(Cow<'a, [TextureViewId]>), +} + +#[derive(Clone, Debug, Error)] +pub enum BindError { + #[error( + "Bind group {group} expects {expected} dynamic offset{s0}. However {actual} dynamic offset{s1} were provided.", + s0 = if *.expected >= 2 { "s" } else { "" }, + s1 = if *.actual >= 2 { "s" } else { "" }, + )] + MismatchedDynamicOffsetCount { + group: u8, + actual: usize, + expected: usize, + }, + #[error( + "Dynamic binding index {idx} (targeting bind group {group}, binding {binding}) with value {offset}, does not respect device's requested `{limit_name}` limit: {alignment}" + )] + UnalignedDynamicBinding { + idx: usize, + group: u8, + binding: u32, + offset: u32, + alignment: u32, + limit_name: &'static str, + }, + #[error( + "Dynamic binding offset index {idx} with offset {offset} would overrun the buffer bound to bind group {group} -> binding {binding}. \ + Buffer size is {buffer_size} bytes, the binding binds bytes {binding_range:?}, meaning the maximum the binding can be offset is {maximum_dynamic_offset} bytes", + )] + DynamicBindingOutOfBounds { + idx: usize, + group: u8, + binding: u32, + offset: u32, + buffer_size: wgt::BufferAddress, + binding_range: Range<wgt::BufferAddress>, + maximum_dynamic_offset: wgt::BufferAddress, + }, +} + +#[derive(Debug)] +pub struct BindGroupDynamicBindingData { + /// The index of the binding. + /// + /// Used for more descriptive errors. + pub(crate) binding_idx: u32, + /// The size of the buffer. + /// + /// Used for more descriptive errors. + pub(crate) buffer_size: wgt::BufferAddress, + /// The range that the binding covers. + /// + /// Used for more descriptive errors. + pub(crate) binding_range: Range<wgt::BufferAddress>, + /// The maximum value the dynamic offset can have before running off the end of the buffer. + pub(crate) maximum_dynamic_offset: wgt::BufferAddress, + /// The binding type. + pub(crate) binding_type: wgt::BufferBindingType, +} + +pub(crate) fn buffer_binding_type_alignment( + limits: &wgt::Limits, + binding_type: wgt::BufferBindingType, +) -> (u32, &'static str) { + match binding_type { + wgt::BufferBindingType::Uniform => ( + limits.min_uniform_buffer_offset_alignment, + "min_uniform_buffer_offset_alignment", + ), + wgt::BufferBindingType::Storage { .. } => ( + limits.min_storage_buffer_offset_alignment, + "min_storage_buffer_offset_alignment", + ), + } +} + +pub struct BindGroup<A: HalApi> { + pub(crate) raw: A::BindGroup, + pub(crate) device_id: Stored<DeviceId>, + pub(crate) layout_id: Valid<BindGroupLayoutId>, + pub(crate) life_guard: LifeGuard, + pub(crate) used: BindGroupStates<A>, + pub(crate) used_buffer_ranges: Vec<BufferInitTrackerAction>, + pub(crate) used_texture_ranges: Vec<TextureInitTrackerAction>, + pub(crate) dynamic_binding_info: Vec<BindGroupDynamicBindingData>, + /// Actual binding sizes for buffers that don't have `min_binding_size` + /// specified in BGL. Listed in the order of iteration of `BGL.entries`. + pub(crate) late_buffer_binding_sizes: Vec<wgt::BufferSize>, +} + +impl<A: HalApi> BindGroup<A> { + pub(crate) fn validate_dynamic_bindings( + &self, + bind_group_index: u8, + offsets: &[wgt::DynamicOffset], + limits: &wgt::Limits, + ) -> Result<(), BindError> { + if self.dynamic_binding_info.len() != offsets.len() { + return Err(BindError::MismatchedDynamicOffsetCount { + group: bind_group_index, + expected: self.dynamic_binding_info.len(), + actual: offsets.len(), + }); + } + + for (idx, (info, &offset)) in self + .dynamic_binding_info + .iter() + .zip(offsets.iter()) + .enumerate() + { + let (alignment, limit_name) = buffer_binding_type_alignment(limits, info.binding_type); + if offset as wgt::BufferAddress % alignment as u64 != 0 { + return Err(BindError::UnalignedDynamicBinding { + group: bind_group_index, + binding: info.binding_idx, + idx, + offset, + alignment, + limit_name, + }); + } + + if offset as wgt::BufferAddress > info.maximum_dynamic_offset { + return Err(BindError::DynamicBindingOutOfBounds { + group: bind_group_index, + binding: info.binding_idx, + idx, + offset, + buffer_size: info.buffer_size, + binding_range: info.binding_range.clone(), + maximum_dynamic_offset: info.maximum_dynamic_offset, + }); + } + } + + Ok(()) + } +} + +impl<A: HalApi> Resource for BindGroup<A> { + const TYPE: &'static str = "BindGroup"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +#[derive(Clone, Debug, Error)] +pub enum GetBindGroupLayoutError { + #[error("pipeline is invalid")] + InvalidPipeline, + #[error("invalid group index {0}")] + InvalidGroupIndex(u32), +} + +#[derive(Clone, Debug, Error, Eq, PartialEq)] +#[error("Buffer is bound with size {bound_size} where the shader expects {shader_size} in group[{group_index}] compact index {compact_index}")] +pub struct LateMinBufferBindingSizeMismatch { + pub group_index: u32, + pub compact_index: usize, + pub shader_size: wgt::BufferAddress, + pub bound_size: wgt::BufferAddress, +} diff --git a/third_party/rust/wgpu-core/src/command/bind.rs b/third_party/rust/wgpu-core/src/command/bind.rs new file mode 100644 index 0000000000..fdcb60c52d --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/bind.rs @@ -0,0 +1,355 @@ +use crate::{ + binding_model::{BindGroup, LateMinBufferBindingSizeMismatch, PipelineLayout}, + device::SHADER_STAGE_COUNT, + hub::{HalApi, Storage}, + id::{BindGroupId, BindGroupLayoutId, PipelineLayoutId, Valid}, + pipeline::LateSizedBufferGroup, + Stored, +}; + +use arrayvec::ArrayVec; + +type BindGroupMask = u8; + +mod compat { + use std::ops::Range; + + #[derive(Debug)] + struct Entry<T> { + assigned: Option<T>, + expected: Option<T>, + } + impl<T> Default for Entry<T> { + fn default() -> Self { + Self { + assigned: None, + expected: None, + } + } + } + impl<T: Copy + PartialEq> Entry<T> { + fn is_active(&self) -> bool { + self.assigned.is_some() && self.expected.is_some() + } + + fn is_valid(&self) -> bool { + self.expected.is_none() || self.expected == self.assigned + } + } + + #[derive(Debug)] + pub struct Manager<T> { + entries: [Entry<T>; hal::MAX_BIND_GROUPS], + } + + impl<T: Copy + PartialEq> Manager<T> { + pub fn new() -> Self { + Self { + entries: Default::default(), + } + } + + fn make_range(&self, start_index: usize) -> Range<usize> { + // find first incompatible entry + let end = self + .entries + .iter() + .position(|e| e.expected.is_none() || e.assigned != e.expected) + .unwrap_or(self.entries.len()); + start_index..end.max(start_index) + } + + pub fn update_expectations(&mut self, expectations: &[T]) -> Range<usize> { + let start_index = self + .entries + .iter() + .zip(expectations) + .position(|(e, &expect)| e.expected != Some(expect)) + .unwrap_or(expectations.len()); + for (e, &expect) in self.entries[start_index..] + .iter_mut() + .zip(expectations[start_index..].iter()) + { + e.expected = Some(expect); + } + for e in self.entries[expectations.len()..].iter_mut() { + e.expected = None; + } + self.make_range(start_index) + } + + pub fn assign(&mut self, index: usize, value: T) -> Range<usize> { + self.entries[index].assigned = Some(value); + self.make_range(index) + } + + pub fn list_active(&self) -> impl Iterator<Item = usize> + '_ { + self.entries + .iter() + .enumerate() + .filter_map(|(i, e)| if e.is_active() { Some(i) } else { None }) + } + + pub fn invalid_mask(&self) -> super::BindGroupMask { + self.entries.iter().enumerate().fold(0, |mask, (i, entry)| { + if entry.is_valid() { + mask + } else { + mask | 1u8 << i + } + }) + } + } + + #[test] + fn test_compatibility() { + let mut man = Manager::<i32>::new(); + man.entries[0] = Entry { + expected: Some(3), + assigned: Some(2), + }; + man.entries[1] = Entry { + expected: Some(1), + assigned: Some(1), + }; + man.entries[2] = Entry { + expected: Some(4), + assigned: Some(5), + }; + // check that we rebind [1] after [0] became compatible + assert_eq!(man.assign(0, 3), 0..2); + // check that nothing is rebound + assert_eq!(man.update_expectations(&[3, 2]), 1..1); + // check that [1] and [2] are rebound on expectations change + assert_eq!(man.update_expectations(&[3, 1, 5]), 1..3); + // reset the first two bindings + assert_eq!(man.update_expectations(&[4, 6, 5]), 0..0); + // check that nothing is rebound, even if there is a match, + // since earlier binding is incompatible. + assert_eq!(man.assign(1, 6), 1..1); + // finally, bind everything + assert_eq!(man.assign(0, 4), 0..3); + } +} + +#[derive(Debug)] +struct LateBufferBinding { + shader_expect_size: wgt::BufferAddress, + bound_size: wgt::BufferAddress, +} + +#[derive(Debug, Default)] +pub(super) struct EntryPayload { + pub(super) group_id: Option<Stored<BindGroupId>>, + pub(super) dynamic_offsets: Vec<wgt::DynamicOffset>, + late_buffer_bindings: Vec<LateBufferBinding>, + /// Since `LateBufferBinding` may contain information about the bindings + /// not used by the pipeline, we need to know when to stop validating. + pub(super) late_bindings_effective_count: usize, +} + +impl EntryPayload { + fn reset(&mut self) { + self.group_id = None; + self.dynamic_offsets.clear(); + self.late_buffer_bindings.clear(); + self.late_bindings_effective_count = 0; + } +} + +#[derive(Debug)] +pub(super) struct Binder { + pub(super) pipeline_layout_id: Option<Valid<PipelineLayoutId>>, //TODO: strongly `Stored` + manager: compat::Manager<Valid<BindGroupLayoutId>>, + payloads: [EntryPayload; hal::MAX_BIND_GROUPS], +} + +impl Binder { + pub(super) fn new() -> Self { + Self { + pipeline_layout_id: None, + manager: compat::Manager::new(), + payloads: Default::default(), + } + } + + pub(super) fn reset(&mut self) { + self.pipeline_layout_id = None; + self.manager = compat::Manager::new(); + for payload in self.payloads.iter_mut() { + payload.reset(); + } + } + + pub(super) fn change_pipeline_layout<'a, A: HalApi>( + &'a mut self, + guard: &Storage<PipelineLayout<A>, PipelineLayoutId>, + new_id: Valid<PipelineLayoutId>, + late_sized_buffer_groups: &[LateSizedBufferGroup], + ) -> (usize, &'a [EntryPayload]) { + let old_id_opt = self.pipeline_layout_id.replace(new_id); + let new = &guard[new_id]; + + let mut bind_range = self.manager.update_expectations(&new.bind_group_layout_ids); + + // Update the buffer binding sizes that are required by shaders. + for (payload, late_group) in self.payloads.iter_mut().zip(late_sized_buffer_groups) { + payload.late_bindings_effective_count = late_group.shader_sizes.len(); + for (late_binding, &shader_expect_size) in payload + .late_buffer_bindings + .iter_mut() + .zip(late_group.shader_sizes.iter()) + { + late_binding.shader_expect_size = shader_expect_size; + } + if late_group.shader_sizes.len() > payload.late_buffer_bindings.len() { + for &shader_expect_size in + late_group.shader_sizes[payload.late_buffer_bindings.len()..].iter() + { + payload.late_buffer_bindings.push(LateBufferBinding { + shader_expect_size, + bound_size: 0, + }); + } + } + } + + if let Some(old_id) = old_id_opt { + let old = &guard[old_id]; + // root constants are the base compatibility property + if old.push_constant_ranges != new.push_constant_ranges { + bind_range.start = 0; + } + } + + (bind_range.start, &self.payloads[bind_range]) + } + + pub(super) fn assign_group<'a, A: HalApi>( + &'a mut self, + index: usize, + bind_group_id: Valid<BindGroupId>, + bind_group: &BindGroup<A>, + offsets: &[wgt::DynamicOffset], + ) -> &'a [EntryPayload] { + log::trace!("\tBinding [{}] = group {:?}", index, bind_group_id); + debug_assert_eq!(A::VARIANT, bind_group_id.0.backend()); + + let payload = &mut self.payloads[index]; + payload.group_id = Some(Stored { + value: bind_group_id, + ref_count: bind_group.life_guard.add_ref(), + }); + payload.dynamic_offsets.clear(); + payload.dynamic_offsets.extend_from_slice(offsets); + + // Fill out the actual binding sizes for buffers, + // whose layout doesn't specify `min_binding_size`. + for (late_binding, late_size) in payload + .late_buffer_bindings + .iter_mut() + .zip(bind_group.late_buffer_binding_sizes.iter()) + { + late_binding.bound_size = late_size.get(); + } + if bind_group.late_buffer_binding_sizes.len() > payload.late_buffer_bindings.len() { + for late_size in + bind_group.late_buffer_binding_sizes[payload.late_buffer_bindings.len()..].iter() + { + payload.late_buffer_bindings.push(LateBufferBinding { + shader_expect_size: 0, + bound_size: late_size.get(), + }); + } + } + + let bind_range = self.manager.assign(index, bind_group.layout_id); + &self.payloads[bind_range] + } + + pub(super) fn list_active(&self) -> impl Iterator<Item = Valid<BindGroupId>> + '_ { + let payloads = &self.payloads; + self.manager + .list_active() + .map(move |index| payloads[index].group_id.as_ref().unwrap().value) + } + + pub(super) fn invalid_mask(&self) -> BindGroupMask { + self.manager.invalid_mask() + } + + /// Scan active buffer bindings corresponding to layouts without `min_binding_size` specified. + pub(super) fn check_late_buffer_bindings( + &self, + ) -> Result<(), LateMinBufferBindingSizeMismatch> { + for group_index in self.manager.list_active() { + let payload = &self.payloads[group_index]; + for (compact_index, late_binding) in payload.late_buffer_bindings + [..payload.late_bindings_effective_count] + .iter() + .enumerate() + { + if late_binding.bound_size < late_binding.shader_expect_size { + return Err(LateMinBufferBindingSizeMismatch { + group_index: group_index as u32, + compact_index, + shader_size: late_binding.shader_expect_size, + bound_size: late_binding.bound_size, + }); + } + } + } + Ok(()) + } +} + +struct PushConstantChange { + stages: wgt::ShaderStages, + offset: u32, + enable: bool, +} + +/// Break up possibly overlapping push constant ranges into a set of +/// non-overlapping ranges which contain all the stage flags of the +/// original ranges. This allows us to zero out (or write any value) +/// to every possible value. +pub fn compute_nonoverlapping_ranges( + ranges: &[wgt::PushConstantRange], +) -> ArrayVec<wgt::PushConstantRange, { SHADER_STAGE_COUNT * 2 }> { + if ranges.is_empty() { + return ArrayVec::new(); + } + debug_assert!(ranges.len() <= SHADER_STAGE_COUNT); + + let mut breaks: ArrayVec<PushConstantChange, { SHADER_STAGE_COUNT * 2 }> = ArrayVec::new(); + for range in ranges { + breaks.push(PushConstantChange { + stages: range.stages, + offset: range.range.start, + enable: true, + }); + breaks.push(PushConstantChange { + stages: range.stages, + offset: range.range.end, + enable: false, + }); + } + breaks.sort_unstable_by_key(|change| change.offset); + + let mut output_ranges = ArrayVec::new(); + let mut position = 0_u32; + let mut stages = wgt::ShaderStages::NONE; + + for bk in breaks { + if bk.offset - position > 0 && !stages.is_empty() { + output_ranges.push(wgt::PushConstantRange { + stages, + range: position..bk.offset, + }) + } + position = bk.offset; + stages.set(bk.stages, bk.enable); + } + + output_ranges +} diff --git a/third_party/rust/wgpu-core/src/command/bundle.rs b/third_party/rust/wgpu-core/src/command/bundle.rs new file mode 100644 index 0000000000..37560ec885 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/bundle.rs @@ -0,0 +1,1644 @@ +/*! Render Bundles + +A render bundle is a prerecorded sequence of commands that can be replayed on a +command encoder with a single call. A single bundle can replayed any number of +times, on different encoders. Constructing a render bundle lets `wgpu` validate +and analyze its commands up front, so that replaying a bundle can be more +efficient than simply re-recording its commands each time. + +Not all commands are available in bundles; for example, a render bundle may not +contain a [`RenderCommand::SetViewport`] command. + +Most of `wgpu`'s backend graphics APIs have something like bundles. For example, +Vulkan calls them "secondary command buffers", and Metal calls them "indirect +command buffers". Although we plan to take advantage of these platform features +at some point in the future, for now `wgpu`'s implementation of render bundles +does not use them: at the hal level, `wgpu` render bundles just replay the +commands. + +## Render Bundle Isolation + +One important property of render bundles is that the draw calls in a render +bundle depend solely on the pipeline and state established within the render +bundle itself. A draw call in a bundle will never use a vertex buffer, say, that +was set in the `RenderPass` before executing the bundle. We call this property +'isolation', in that a render bundle is somewhat isolated from the passes that +use it. + +Render passes are also isolated from the effects of bundles. After executing a +render bundle, a render pass's pipeline, bind groups, and vertex and index +buffers are are unset, so the bundle cannot affect later draw calls in the pass. + +A render pass is not fully isolated from a bundle's effects on push constant +values. Draw calls following a bundle's execution will see whatever values the +bundle writes to push constant storage. Setting a pipeline initializes any push +constant storage it could access to zero, and this initialization may also be +visible after bundle execution. + +## Render Bundle Lifecycle + +To create a render bundle: + +1) Create a [`RenderBundleEncoder`] by calling + [`Global::device_create_render_bundle_encoder`][Gdcrbe]. + +2) Record commands in the `RenderBundleEncoder` using functions from the + [`bundle_ffi`] module. + +3) Call [`Global::render_bundle_encoder_finish`][Grbef], which analyzes and cleans up + the command stream and returns a `RenderBundleId`. + +4) Then, any number of times, call [`wgpu_render_pass_execute_bundles`][wrpeb] to + execute the bundle as part of some render pass. + +## Implementation + +The most complex part of render bundles is the "finish" step, mostly implemented +in [`RenderBundleEncoder::finish`]. This consumes the commands stored in the +encoder's [`BasePass`], while validating everything, tracking the state, +dropping redundant or unnecessary commands, and presenting the results as a new +[`RenderBundle`]. It doesn't actually execute any commands. + +This step also enforces the 'isolation' property mentioned above: every draw +call is checked to ensure that the resources it uses on were established since +the last time the pipeline was set. This means the bundle can be executed +verbatim without any state tracking. + +### Execution + +When the bundle is used in an actual render pass, `RenderBundle::execute` is +called. It goes through the commands and issues them into the native command +buffer. Thanks to isolation, it doesn't track any bind group invalidations or +index format changes. + +[Gdcrbe]: crate::hub::Global::device_create_render_bundle_encoder +[Grbef]: crate::hub::Global::render_bundle_encoder_finish +[wrpeb]: crate::command::render_ffi::wgpu_render_pass_execute_bundles +!*/ + +#![allow(clippy::reversed_empty_ranges)] + +use crate::{ + binding_model::{self, buffer_binding_type_alignment}, + command::{ + BasePass, BindGroupStateChange, DrawError, MapPassErr, PassErrorScope, RenderCommand, + RenderCommandError, StateChange, + }, + conv, + device::{ + AttachmentData, Device, DeviceError, MissingDownlevelFlags, RenderPassContext, + SHADER_STAGE_COUNT, + }, + error::{ErrorFormatter, PrettyError}, + hub::{GlobalIdentityHandlerFactory, HalApi, Hub, Resource, Storage, Token}, + id, + init_tracker::{BufferInitTrackerAction, MemoryInitKind, TextureInitTrackerAction}, + pipeline::{self, PipelineFlags}, + resource, + track::RenderBundleScope, + validation::check_buffer_usage, + Label, LabelHelpers, LifeGuard, Stored, +}; +use arrayvec::ArrayVec; +use std::{borrow::Cow, mem, num::NonZeroU32, ops::Range}; +use thiserror::Error; + +use hal::CommandEncoder as _; + +/// Describes a [`RenderBundleEncoder`]. +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct RenderBundleEncoderDescriptor<'a> { + /// Debug label of the render bundle encoder. + /// + /// This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// The formats of the color attachments that this render bundle is capable + /// to rendering to. + /// + /// This must match the formats of the color attachments in the + /// renderpass this render bundle is executed in. + pub color_formats: Cow<'a, [Option<wgt::TextureFormat>]>, + /// Information about the depth attachment that this render bundle is + /// capable to rendering to. + /// + /// The format must match the format of the depth attachments in the + /// renderpass this render bundle is executed in. + pub depth_stencil: Option<wgt::RenderBundleDepthStencil>, + /// Sample count this render bundle is capable of rendering to. + /// + /// This must match the pipelines and the renderpasses it is used in. + pub sample_count: u32, + /// If this render bundle will rendering to multiple array layers in the + /// attachments at the same time. + pub multiview: Option<NonZeroU32>, +} + +#[derive(Debug)] +#[cfg_attr(feature = "serial-pass", derive(serde::Deserialize, serde::Serialize))] +pub struct RenderBundleEncoder { + base: BasePass<RenderCommand>, + parent_id: id::DeviceId, + pub(crate) context: RenderPassContext, + pub(crate) is_depth_read_only: bool, + pub(crate) is_stencil_read_only: bool, + + // Resource binding dedupe state. + #[cfg_attr(feature = "serial-pass", serde(skip))] + current_bind_groups: BindGroupStateChange, + #[cfg_attr(feature = "serial-pass", serde(skip))] + current_pipeline: StateChange<id::RenderPipelineId>, +} + +impl RenderBundleEncoder { + pub fn new( + desc: &RenderBundleEncoderDescriptor, + parent_id: id::DeviceId, + base: Option<BasePass<RenderCommand>>, + ) -> Result<Self, CreateRenderBundleError> { + let (is_depth_read_only, is_stencil_read_only) = match desc.depth_stencil { + Some(ds) => { + let aspects = hal::FormatAspects::from(ds.format); + ( + !aspects.contains(hal::FormatAspects::DEPTH) || ds.depth_read_only, + !aspects.contains(hal::FormatAspects::STENCIL) || ds.stencil_read_only, + ) + } + // There's no depth/stencil attachment, so these values just don't + // matter. Choose the most accommodating value, to simplify + // validation. + None => (true, true), + }; + + //TODO: validate that attachment formats are renderable, + // have expected aspects, support multisampling. + Ok(Self { + base: base.unwrap_or_else(|| BasePass::new(&desc.label)), + parent_id, + context: RenderPassContext { + attachments: AttachmentData { + colors: if desc.color_formats.len() > hal::MAX_COLOR_ATTACHMENTS { + return Err(CreateRenderBundleError::TooManyColorAttachments); + } else { + desc.color_formats.iter().cloned().collect() + }, + resolves: ArrayVec::new(), + depth_stencil: desc.depth_stencil.map(|ds| ds.format), + }, + sample_count: { + let sc = desc.sample_count; + if sc == 0 || sc > 32 || !conv::is_power_of_two_u32(sc) { + return Err(CreateRenderBundleError::InvalidSampleCount(sc)); + } + sc + }, + multiview: desc.multiview, + }, + + is_depth_read_only, + is_stencil_read_only, + current_bind_groups: BindGroupStateChange::new(), + current_pipeline: StateChange::new(), + }) + } + + pub fn dummy(parent_id: id::DeviceId) -> Self { + Self { + base: BasePass::new(&None), + parent_id, + context: RenderPassContext { + attachments: AttachmentData { + colors: ArrayVec::new(), + resolves: ArrayVec::new(), + depth_stencil: None, + }, + sample_count: 0, + multiview: None, + }, + is_depth_read_only: false, + is_stencil_read_only: false, + + current_bind_groups: BindGroupStateChange::new(), + current_pipeline: StateChange::new(), + } + } + + #[cfg(feature = "trace")] + pub(crate) fn to_base_pass(&self) -> BasePass<RenderCommand> { + BasePass::from_ref(self.base.as_ref()) + } + + pub fn parent(&self) -> id::DeviceId { + self.parent_id + } + + /// Convert this encoder's commands into a [`RenderBundle`]. + /// + /// We want executing a [`RenderBundle`] to be quick, so we take + /// this opportunity to clean up the [`RenderBundleEncoder`]'s + /// command stream and gather metadata about it that will help + /// keep [`ExecuteBundle`] simple and fast. We remove redundant + /// commands (along with their side data), note resource usage, + /// and accumulate buffer and texture initialization actions. + /// + /// [`ExecuteBundle`]: RenderCommand::ExecuteBundle + pub(crate) fn finish<A: HalApi, G: GlobalIdentityHandlerFactory>( + self, + desc: &RenderBundleDescriptor, + device: &Device<A>, + hub: &Hub<A, G>, + token: &mut Token<Device<A>>, + ) -> Result<RenderBundle<A>, RenderBundleError> { + let (pipeline_layout_guard, mut token) = hub.pipeline_layouts.read(token); + let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); + let (pipeline_guard, mut token) = hub.render_pipelines.read(&mut token); + let (query_set_guard, mut token) = hub.query_sets.read(&mut token); + let (buffer_guard, mut token) = hub.buffers.read(&mut token); + let (texture_guard, _) = hub.textures.read(&mut token); + + let mut state = State { + trackers: RenderBundleScope::new( + &*buffer_guard, + &*texture_guard, + &*bind_group_guard, + &*pipeline_guard, + &*query_set_guard, + ), + pipeline: None, + bind: (0..hal::MAX_BIND_GROUPS).map(|_| None).collect(), + vertex: (0..hal::MAX_VERTEX_BUFFERS).map(|_| None).collect(), + index: None, + flat_dynamic_offsets: Vec::new(), + }; + let mut commands = Vec::new(); + let mut buffer_memory_init_actions = Vec::new(); + let mut texture_memory_init_actions = Vec::new(); + + let base = self.base.as_ref(); + let mut next_dynamic_offset = 0; + + for &command in base.commands { + match command { + RenderCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => { + let scope = PassErrorScope::SetBindGroup(bind_group_id); + + let bind_group: &binding_model::BindGroup<A> = state + .trackers + .bind_groups + .add_single(&*bind_group_guard, bind_group_id) + .ok_or(RenderCommandError::InvalidBindGroup(bind_group_id)) + .map_pass_err(scope)?; + self.check_valid_to_use(bind_group.device_id.value) + .map_pass_err(scope)?; + + let max_bind_groups = device.limits.max_bind_groups; + if (index as u32) >= max_bind_groups { + return Err(RenderCommandError::BindGroupIndexOutOfRange { + index, + max: max_bind_groups, + }) + .map_pass_err(scope); + } + + // Identify the next `num_dynamic_offsets` entries from `base.dynamic_offsets`. + let num_dynamic_offsets = num_dynamic_offsets as usize; + let offsets_range = + next_dynamic_offset..next_dynamic_offset + num_dynamic_offsets; + next_dynamic_offset = offsets_range.end; + let offsets = &base.dynamic_offsets[offsets_range.clone()]; + + if bind_group.dynamic_binding_info.len() != offsets.len() { + return Err(RenderCommandError::InvalidDynamicOffsetCount { + actual: offsets.len(), + expected: bind_group.dynamic_binding_info.len(), + }) + .map_pass_err(scope); + } + + // Check for misaligned offsets. + for (offset, info) in offsets + .iter() + .map(|offset| *offset as wgt::BufferAddress) + .zip(bind_group.dynamic_binding_info.iter()) + { + let (alignment, limit_name) = + buffer_binding_type_alignment(&device.limits, info.binding_type); + if offset % alignment as u64 != 0 { + return Err(RenderCommandError::UnalignedBufferOffset( + offset, limit_name, alignment, + )) + .map_pass_err(scope); + } + } + + buffer_memory_init_actions.extend_from_slice(&bind_group.used_buffer_ranges); + texture_memory_init_actions.extend_from_slice(&bind_group.used_texture_ranges); + + state.set_bind_group(index, bind_group_id, bind_group.layout_id, offsets_range); + unsafe { + state + .trackers + .merge_bind_group(&*texture_guard, &bind_group.used) + .map_pass_err(scope)? + }; + //Note: stateless trackers are not merged: the lifetime reference + // is held to the bind group itself. + } + RenderCommand::SetPipeline(pipeline_id) => { + let scope = PassErrorScope::SetPipelineRender(pipeline_id); + + let pipeline: &pipeline::RenderPipeline<A> = state + .trackers + .render_pipelines + .add_single(&*pipeline_guard, pipeline_id) + .ok_or(RenderCommandError::InvalidPipeline(pipeline_id)) + .map_pass_err(scope)?; + self.check_valid_to_use(pipeline.device_id.value) + .map_pass_err(scope)?; + + self.context + .check_compatible(&pipeline.pass_context) + .map_err(RenderCommandError::IncompatiblePipelineTargets) + .map_pass_err(scope)?; + + if (pipeline.flags.contains(PipelineFlags::WRITES_DEPTH) + && self.is_depth_read_only) + || (pipeline.flags.contains(PipelineFlags::WRITES_STENCIL) + && self.is_stencil_read_only) + { + return Err(RenderCommandError::IncompatiblePipelineRods) + .map_pass_err(scope); + } + + let layout = &pipeline_layout_guard[pipeline.layout_id.value]; + let pipeline_state = PipelineState::new(pipeline_id, pipeline, layout); + + commands.push(command); + + // If this pipeline uses push constants, zero out their values. + if let Some(iter) = pipeline_state.zero_push_constants() { + commands.extend(iter) + } + + state.invalidate_bind_groups(&pipeline_state, layout); + state.pipeline = Some(pipeline_state); + } + RenderCommand::SetIndexBuffer { + buffer_id, + index_format, + offset, + size, + } => { + let scope = PassErrorScope::SetIndexBuffer(buffer_id); + let buffer: &resource::Buffer<A> = state + .trackers + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDEX) + .map_pass_err(scope)?; + self.check_valid_to_use(buffer.device_id.value) + .map_pass_err(scope)?; + check_buffer_usage(buffer.usage, wgt::BufferUsages::INDEX) + .map_pass_err(scope)?; + + let end = match size { + Some(s) => offset + s.get(), + None => buffer.size, + }; + buffer_memory_init_actions.extend(buffer.initialization_status.create_action( + buffer_id, + offset..end, + MemoryInitKind::NeedsInitializedMemory, + )); + state.set_index_buffer(buffer_id, index_format, offset..end); + } + RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + } => { + let scope = PassErrorScope::SetVertexBuffer(buffer_id); + let buffer: &resource::Buffer<A> = state + .trackers + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::VERTEX) + .map_pass_err(scope)?; + self.check_valid_to_use(buffer.device_id.value) + .map_pass_err(scope)?; + check_buffer_usage(buffer.usage, wgt::BufferUsages::VERTEX) + .map_pass_err(scope)?; + + let end = match size { + Some(s) => offset + s.get(), + None => buffer.size, + }; + buffer_memory_init_actions.extend(buffer.initialization_status.create_action( + buffer_id, + offset..end, + MemoryInitKind::NeedsInitializedMemory, + )); + state.vertex[slot as usize] = Some(VertexState::new(buffer_id, offset..end)); + } + RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset: _, + } => { + let scope = PassErrorScope::SetPushConstant; + let end_offset = offset + size_bytes; + + let pipeline = state.pipeline(scope)?; + let pipeline_layout = &pipeline_layout_guard[pipeline.layout_id]; + + pipeline_layout + .validate_push_constant_ranges(stages, offset, end_offset) + .map_pass_err(scope)?; + + commands.push(command); + } + RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + } => { + let scope = PassErrorScope::Draw { + indexed: false, + indirect: false, + pipeline: state.pipeline_id(), + }; + let pipeline = state.pipeline(scope)?; + let used_bind_groups = pipeline.used_bind_groups; + let vertex_limits = state.vertex_limits(pipeline); + let last_vertex = first_vertex + vertex_count; + if last_vertex > vertex_limits.vertex_limit { + return Err(DrawError::VertexBeyondLimit { + last_vertex, + vertex_limit: vertex_limits.vertex_limit, + slot: vertex_limits.vertex_limit_slot, + }) + .map_pass_err(scope); + } + let last_instance = first_instance + instance_count; + if last_instance > vertex_limits.instance_limit { + return Err(DrawError::InstanceBeyondLimit { + last_instance, + instance_limit: vertex_limits.instance_limit, + slot: vertex_limits.instance_limit_slot, + }) + .map_pass_err(scope); + } + commands.extend(state.flush_vertices()); + commands.extend(state.flush_binds(used_bind_groups, base.dynamic_offsets)); + commands.push(command); + } + RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex: _, + first_instance, + } => { + let scope = PassErrorScope::Draw { + indexed: true, + indirect: false, + pipeline: state.pipeline_id(), + }; + let pipeline = state.pipeline(scope)?; + let used_bind_groups = pipeline.used_bind_groups; + let index = match state.index { + Some(ref index) => index, + None => return Err(DrawError::MissingIndexBuffer).map_pass_err(scope), + }; + //TODO: validate that base_vertex + max_index() is within the provided range + let vertex_limits = state.vertex_limits(pipeline); + let index_limit = index.limit(); + let last_index = first_index + index_count; + if last_index > index_limit { + return Err(DrawError::IndexBeyondLimit { + last_index, + index_limit, + }) + .map_pass_err(scope); + } + let last_instance = first_instance + instance_count; + if last_instance > vertex_limits.instance_limit { + return Err(DrawError::InstanceBeyondLimit { + last_instance, + instance_limit: vertex_limits.instance_limit, + slot: vertex_limits.instance_limit_slot, + }) + .map_pass_err(scope); + } + commands.extend(state.flush_index()); + commands.extend(state.flush_vertices()); + commands.extend(state.flush_binds(used_bind_groups, base.dynamic_offsets)); + commands.push(command); + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: false, + } => { + let scope = PassErrorScope::Draw { + indexed: false, + indirect: true, + pipeline: state.pipeline_id(), + }; + device + .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) + .map_pass_err(scope)?; + + let pipeline = state.pipeline(scope)?; + let used_bind_groups = pipeline.used_bind_groups; + + let buffer: &resource::Buffer<A> = state + .trackers + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDIRECT) + .map_pass_err(scope)?; + self.check_valid_to_use(buffer.device_id.value) + .map_pass_err(scope)?; + check_buffer_usage(buffer.usage, wgt::BufferUsages::INDIRECT) + .map_pass_err(scope)?; + + buffer_memory_init_actions.extend(buffer.initialization_status.create_action( + buffer_id, + offset..(offset + mem::size_of::<wgt::DrawIndirectArgs>() as u64), + MemoryInitKind::NeedsInitializedMemory, + )); + + commands.extend(state.flush_vertices()); + commands.extend(state.flush_binds(used_bind_groups, base.dynamic_offsets)); + commands.push(command); + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: true, + } => { + let scope = PassErrorScope::Draw { + indexed: true, + indirect: true, + pipeline: state.pipeline_id(), + }; + device + .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) + .map_pass_err(scope)?; + + let pipeline = state.pipeline(scope)?; + let used_bind_groups = pipeline.used_bind_groups; + + let buffer: &resource::Buffer<A> = state + .trackers + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDIRECT) + .map_pass_err(scope)?; + self.check_valid_to_use(buffer.device_id.value) + .map_pass_err(scope)?; + check_buffer_usage(buffer.usage, wgt::BufferUsages::INDIRECT) + .map_pass_err(scope)?; + + buffer_memory_init_actions.extend(buffer.initialization_status.create_action( + buffer_id, + offset..(offset + mem::size_of::<wgt::DrawIndirectArgs>() as u64), + MemoryInitKind::NeedsInitializedMemory, + )); + + let index = match state.index { + Some(ref mut index) => index, + None => return Err(DrawError::MissingIndexBuffer).map_pass_err(scope), + }; + + commands.extend(index.flush()); + commands.extend(state.flush_vertices()); + commands.extend(state.flush_binds(used_bind_groups, base.dynamic_offsets)); + commands.push(command); + } + RenderCommand::MultiDrawIndirect { .. } + | RenderCommand::MultiDrawIndirectCount { .. } => unimplemented!(), + RenderCommand::PushDebugGroup { color: _, len: _ } => unimplemented!(), + RenderCommand::InsertDebugMarker { color: _, len: _ } => unimplemented!(), + RenderCommand::PopDebugGroup => unimplemented!(), + RenderCommand::WriteTimestamp { .. } // Must check the WRITE_TIMESTAMP_INSIDE_PASSES feature + | RenderCommand::BeginPipelineStatisticsQuery { .. } + | RenderCommand::EndPipelineStatisticsQuery => unimplemented!(), + RenderCommand::ExecuteBundle(_) + | RenderCommand::SetBlendConstant(_) + | RenderCommand::SetStencilReference(_) + | RenderCommand::SetViewport { .. } + | RenderCommand::SetScissor(_) => unreachable!("not supported by a render bundle"), + } + } + + Ok(RenderBundle { + base: BasePass { + label: desc.label.as_ref().map(|cow| cow.to_string()), + commands, + dynamic_offsets: state.flat_dynamic_offsets, + string_data: Vec::new(), + push_constant_data: Vec::new(), + }, + is_depth_read_only: self.is_depth_read_only, + is_stencil_read_only: self.is_stencil_read_only, + device_id: Stored { + value: id::Valid(self.parent_id), + ref_count: device.life_guard.add_ref(), + }, + used: state.trackers, + buffer_memory_init_actions, + texture_memory_init_actions, + context: self.context, + life_guard: LifeGuard::new(desc.label.borrow_or_default()), + }) + } + + fn check_valid_to_use( + &self, + device_id: id::Valid<id::DeviceId>, + ) -> Result<(), RenderBundleErrorInner> { + if device_id.0 != self.parent_id { + return Err(RenderBundleErrorInner::NotValidToUse); + } + + Ok(()) + } + + pub fn set_index_buffer( + &mut self, + buffer_id: id::BufferId, + index_format: wgt::IndexFormat, + offset: wgt::BufferAddress, + size: Option<wgt::BufferSize>, + ) { + self.base.commands.push(RenderCommand::SetIndexBuffer { + buffer_id, + index_format, + offset, + size, + }); + } +} + +/// Error type returned from `RenderBundleEncoder::new` if the sample count is invalid. +#[derive(Clone, Debug, Error)] +pub enum CreateRenderBundleError { + #[error("invalid number of samples {0}")] + InvalidSampleCount(u32), + #[error("number of color attachments exceeds the limit")] + TooManyColorAttachments, +} + +/// Error type returned from `RenderBundleEncoder::new` if the sample count is invalid. +#[derive(Clone, Debug, Error)] +pub enum ExecutionError { + #[error("buffer {0:?} is destroyed")] + DestroyedBuffer(id::BufferId), + #[error("using {0} in a render bundle is not implemented")] + Unimplemented(&'static str), +} +impl PrettyError for ExecutionError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + match *self { + Self::DestroyedBuffer(id) => { + fmt.buffer_label(&id); + } + Self::Unimplemented(_reason) => {} + }; + } +} + +pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor<Label<'a>>; + +//Note: here, `RenderBundle` is just wrapping a raw stream of render commands. +// The plan is to back it by an actual Vulkan secondary buffer, D3D12 Bundle, +// or Metal indirect command buffer. +pub struct RenderBundle<A: HalApi> { + // Normalized command stream. It can be executed verbatim, + // without re-binding anything on the pipeline change. + base: BasePass<RenderCommand>, + pub(super) is_depth_read_only: bool, + pub(super) is_stencil_read_only: bool, + pub(crate) device_id: Stored<id::DeviceId>, + pub(crate) used: RenderBundleScope<A>, + pub(super) buffer_memory_init_actions: Vec<BufferInitTrackerAction>, + pub(super) texture_memory_init_actions: Vec<TextureInitTrackerAction>, + pub(super) context: RenderPassContext, + pub(crate) life_guard: LifeGuard, +} + +unsafe impl<A: HalApi> Send for RenderBundle<A> {} +unsafe impl<A: HalApi> Sync for RenderBundle<A> {} + +impl<A: HalApi> RenderBundle<A> { + /// Actually encode the contents into a native command buffer. + /// + /// This is partially duplicating the logic of `command_encoder_run_render_pass`. + /// However the point of this function is to be lighter, since we already had + /// a chance to go through the commands in `render_bundle_encoder_finish`. + /// + /// Note that the function isn't expected to fail, generally. + /// All the validation has already been done by this point. + /// The only failure condition is if some of the used buffers are destroyed. + pub(super) unsafe fn execute( + &self, + raw: &mut A::CommandEncoder, + pipeline_layout_guard: &Storage< + crate::binding_model::PipelineLayout<A>, + id::PipelineLayoutId, + >, + bind_group_guard: &Storage<crate::binding_model::BindGroup<A>, id::BindGroupId>, + pipeline_guard: &Storage<crate::pipeline::RenderPipeline<A>, id::RenderPipelineId>, + buffer_guard: &Storage<crate::resource::Buffer<A>, id::BufferId>, + ) -> Result<(), ExecutionError> { + let mut offsets = self.base.dynamic_offsets.as_slice(); + let mut pipeline_layout_id = None::<id::Valid<id::PipelineLayoutId>>; + if let Some(ref label) = self.base.label { + unsafe { raw.begin_debug_marker(label) }; + } + + for command in self.base.commands.iter() { + match *command { + RenderCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => { + let bind_group = bind_group_guard.get(bind_group_id).unwrap(); + unsafe { + raw.set_bind_group( + &pipeline_layout_guard[pipeline_layout_id.unwrap()].raw, + index as u32, + &bind_group.raw, + &offsets[..num_dynamic_offsets as usize], + ) + }; + offsets = &offsets[num_dynamic_offsets as usize..]; + } + RenderCommand::SetPipeline(pipeline_id) => { + let pipeline = pipeline_guard.get(pipeline_id).unwrap(); + unsafe { raw.set_render_pipeline(&pipeline.raw) }; + + pipeline_layout_id = Some(pipeline.layout_id.value); + } + RenderCommand::SetIndexBuffer { + buffer_id, + index_format, + offset, + size, + } => { + let buffer = buffer_guard + .get(buffer_id) + .unwrap() + .raw + .as_ref() + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))?; + let bb = hal::BufferBinding { + buffer, + offset, + size, + }; + unsafe { raw.set_index_buffer(bb, index_format) }; + } + RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + } => { + let buffer = buffer_guard + .get(buffer_id) + .unwrap() + .raw + .as_ref() + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))?; + let bb = hal::BufferBinding { + buffer, + offset, + size, + }; + unsafe { raw.set_vertex_buffer(slot, bb) }; + } + RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset, + } => { + let pipeline_layout_id = pipeline_layout_id.unwrap(); + let pipeline_layout = &pipeline_layout_guard[pipeline_layout_id]; + + if let Some(values_offset) = values_offset { + let values_end_offset = + (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize; + let data_slice = &self.base.push_constant_data + [(values_offset as usize)..values_end_offset]; + + unsafe { + raw.set_push_constants(&pipeline_layout.raw, stages, offset, data_slice) + } + } else { + super::push_constant_clear( + offset, + size_bytes, + |clear_offset, clear_data| { + unsafe { + raw.set_push_constants( + &pipeline_layout.raw, + stages, + clear_offset, + clear_data, + ) + }; + }, + ); + } + } + RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + } => { + unsafe { raw.draw(first_vertex, vertex_count, first_instance, instance_count) }; + } + RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + } => { + unsafe { + raw.draw_indexed( + first_index, + index_count, + base_vertex, + first_instance, + instance_count, + ) + }; + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: false, + } => { + let buffer = buffer_guard + .get(buffer_id) + .unwrap() + .raw + .as_ref() + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))?; + unsafe { raw.draw_indirect(buffer, offset, 1) }; + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: true, + } => { + let buffer = buffer_guard + .get(buffer_id) + .unwrap() + .raw + .as_ref() + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))?; + unsafe { raw.draw_indexed_indirect(buffer, offset, 1) }; + } + RenderCommand::MultiDrawIndirect { .. } + | RenderCommand::MultiDrawIndirectCount { .. } => { + return Err(ExecutionError::Unimplemented("multi-draw-indirect")) + } + RenderCommand::PushDebugGroup { .. } + | RenderCommand::InsertDebugMarker { .. } + | RenderCommand::PopDebugGroup => { + return Err(ExecutionError::Unimplemented("debug-markers")) + } + RenderCommand::WriteTimestamp { .. } + | RenderCommand::BeginPipelineStatisticsQuery { .. } + | RenderCommand::EndPipelineStatisticsQuery => { + return Err(ExecutionError::Unimplemented("queries")) + } + RenderCommand::ExecuteBundle(_) + | RenderCommand::SetBlendConstant(_) + | RenderCommand::SetStencilReference(_) + | RenderCommand::SetViewport { .. } + | RenderCommand::SetScissor(_) => unreachable!(), + } + } + + if let Some(_) = self.base.label { + unsafe { raw.end_debug_marker() }; + } + + Ok(()) + } +} + +impl<A: HalApi> Resource for RenderBundle<A> { + const TYPE: &'static str = "RenderBundle"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +/// A render bundle's current index buffer state. +/// +/// [`RenderBundleEncoder::finish`] records the currently set index buffer here, +/// and calls [`State::flush_index`] before any indexed draw command to produce +/// a `SetIndexBuffer` command if one is necessary. +#[derive(Debug)] +struct IndexState { + buffer: id::BufferId, + format: wgt::IndexFormat, + range: Range<wgt::BufferAddress>, + is_dirty: bool, +} + +impl IndexState { + /// Return the number of entries in the current index buffer. + /// + /// Panic if no index buffer has been set. + fn limit(&self) -> u32 { + let bytes_per_index = match self.format { + wgt::IndexFormat::Uint16 => 2, + wgt::IndexFormat::Uint32 => 4, + }; + ((self.range.end - self.range.start) / bytes_per_index) as u32 + } + + /// Generate a `SetIndexBuffer` command to prepare for an indexed draw + /// command, if needed. + fn flush(&mut self) -> Option<RenderCommand> { + if self.is_dirty { + self.is_dirty = false; + Some(RenderCommand::SetIndexBuffer { + buffer_id: self.buffer, + index_format: self.format, + offset: self.range.start, + size: wgt::BufferSize::new(self.range.end - self.range.start), + }) + } else { + None + } + } +} + +/// The state of a single vertex buffer slot during render bundle encoding. +/// +/// [`RenderBundleEncoder::finish`] uses this to drop redundant +/// `SetVertexBuffer` commands from the final [`RenderBundle`]. It +/// records one vertex buffer slot's state changes here, and then +/// calls this type's [`flush`] method just before any draw command to +/// produce a `SetVertexBuffer` commands if one is necessary. +/// +/// [`flush`]: IndexState::flush +#[derive(Debug)] +struct VertexState { + buffer: id::BufferId, + range: Range<wgt::BufferAddress>, + is_dirty: bool, +} + +impl VertexState { + fn new(buffer: id::BufferId, range: Range<wgt::BufferAddress>) -> Self { + Self { + buffer, + range, + is_dirty: true, + } + } + + /// Generate a `SetVertexBuffer` command for this slot, if necessary. + /// + /// `slot` is the index of the vertex buffer slot that `self` tracks. + fn flush(&mut self, slot: u32) -> Option<RenderCommand> { + if self.is_dirty { + self.is_dirty = false; + Some(RenderCommand::SetVertexBuffer { + slot, + buffer_id: self.buffer, + offset: self.range.start, + size: wgt::BufferSize::new(self.range.end - self.range.start), + }) + } else { + None + } + } +} + +/// A bind group that has been set at a particular index during render bundle encoding. +#[derive(Debug)] +struct BindState { + /// The id of the bind group set at this index. + bind_group_id: id::BindGroupId, + + /// The layout of `group`. + layout_id: id::Valid<id::BindGroupLayoutId>, + + /// The range of dynamic offsets for this bind group, in the original + /// command stream's `BassPass::dynamic_offsets` array. + dynamic_offsets: Range<usize>, + + /// True if this index's contents have been changed since the last time we + /// generated a `SetBindGroup` command. + is_dirty: bool, +} + +#[derive(Debug)] +struct VertexLimitState { + /// Length of the shortest vertex rate vertex buffer + vertex_limit: u32, + /// Buffer slot which the shortest vertex rate vertex buffer is bound to + vertex_limit_slot: u32, + /// Length of the shortest instance rate vertex buffer + instance_limit: u32, + /// Buffer slot which the shortest instance rate vertex buffer is bound to + instance_limit_slot: u32, +} + +/// The bundle's current pipeline, and some cached information needed for validation. +struct PipelineState { + /// The pipeline's id. + id: id::RenderPipelineId, + + /// The id of the pipeline's layout. + layout_id: id::Valid<id::PipelineLayoutId>, + + /// How this pipeline's vertex shader traverses each vertex buffer, indexed + /// by vertex buffer slot number. + steps: Vec<pipeline::VertexStep>, + + /// Ranges of push constants this pipeline uses, copied from the pipeline + /// layout. + push_constant_ranges: ArrayVec<wgt::PushConstantRange, { SHADER_STAGE_COUNT }>, + + /// The number of bind groups this pipeline uses. + used_bind_groups: usize, +} + +impl PipelineState { + fn new<A: HalApi>( + pipeline_id: id::RenderPipelineId, + pipeline: &pipeline::RenderPipeline<A>, + layout: &binding_model::PipelineLayout<A>, + ) -> Self { + Self { + id: pipeline_id, + layout_id: pipeline.layout_id.value, + steps: pipeline.vertex_steps.to_vec(), + push_constant_ranges: layout.push_constant_ranges.iter().cloned().collect(), + used_bind_groups: layout.bind_group_layout_ids.len(), + } + } + + /// Return a sequence of commands to zero the push constant ranges this + /// pipeline uses. If no initialization is necessary, return `None`. + fn zero_push_constants(&self) -> Option<impl Iterator<Item = RenderCommand>> { + if !self.push_constant_ranges.is_empty() { + let nonoverlapping_ranges = + super::bind::compute_nonoverlapping_ranges(&self.push_constant_ranges); + + Some( + nonoverlapping_ranges + .into_iter() + .map(|range| RenderCommand::SetPushConstant { + stages: range.stages, + offset: range.range.start, + size_bytes: range.range.end - range.range.start, + values_offset: None, // write zeros + }), + ) + } else { + None + } + } +} + +/// State for analyzing and cleaning up bundle command streams. +/// +/// To minimize state updates, [`RenderBundleEncoder::finish`] +/// actually just applies commands like [`SetBindGroup`] and +/// [`SetIndexBuffer`] to the simulated state stored here, and then +/// calls the `flush_foo` methods before draw calls to produce the +/// update commands we actually need. +/// +/// [`SetBindGroup`]: RenderCommand::SetBindGroup +/// [`SetIndexBuffer`]: RenderCommand::SetIndexBuffer +struct State<A: HalApi> { + /// Resources used by this bundle. This will become [`RenderBundle::used`]. + trackers: RenderBundleScope<A>, + + /// The currently set pipeline, if any. + pipeline: Option<PipelineState>, + + /// The bind group set at each index, if any. + bind: ArrayVec<Option<BindState>, { hal::MAX_BIND_GROUPS }>, + + /// The state of each vertex buffer slot. + vertex: ArrayVec<Option<VertexState>, { hal::MAX_VERTEX_BUFFERS }>, + + /// The current index buffer, if one has been set. We flush this state + /// before indexed draw commands. + index: Option<IndexState>, + + /// Dynamic offset values used by the cleaned-up command sequence. + /// + /// This becomes the final [`RenderBundle`]'s [`BasePass`]'s + /// [`dynamic_offsets`] list. + /// + /// [`dynamic_offsets`]: BasePass::dynamic_offsets + flat_dynamic_offsets: Vec<wgt::DynamicOffset>, +} + +impl<A: HalApi> State<A> { + fn vertex_limits(&self, pipeline: &PipelineState) -> VertexLimitState { + let mut vert_state = VertexLimitState { + vertex_limit: u32::MAX, + vertex_limit_slot: 0, + instance_limit: u32::MAX, + instance_limit_slot: 0, + }; + for (idx, (vbs, step)) in self.vertex.iter().zip(&pipeline.steps).enumerate() { + if let Some(ref vbs) = *vbs { + let limit = ((vbs.range.end - vbs.range.start) / step.stride) as u32; + match step.mode { + wgt::VertexStepMode::Vertex => { + if limit < vert_state.vertex_limit { + vert_state.vertex_limit = limit; + vert_state.vertex_limit_slot = idx as _; + } + } + wgt::VertexStepMode::Instance => { + if limit < vert_state.instance_limit { + vert_state.instance_limit = limit; + vert_state.instance_limit_slot = idx as _; + } + } + } + } + } + vert_state + } + + /// Return the id of the current pipeline, if any. + fn pipeline_id(&self) -> Option<id::RenderPipelineId> { + self.pipeline.as_ref().map(|p| p.id) + } + + /// Return the current pipeline state. Return an error if none is set. + fn pipeline(&self, scope: PassErrorScope) -> Result<&PipelineState, RenderBundleError> { + self.pipeline + .as_ref() + .ok_or(DrawError::MissingPipeline) + .map_pass_err(scope) + } + + /// Mark all non-empty bind group table entries from `index` onwards as dirty. + fn invalidate_bind_group_from(&mut self, index: usize) { + for contents in self.bind[index..].iter_mut().flatten() { + contents.is_dirty = true; + } + } + + fn set_bind_group( + &mut self, + slot: u8, + bind_group_id: id::BindGroupId, + layout_id: id::Valid<id::BindGroupLayoutId>, + dynamic_offsets: Range<usize>, + ) { + // If this call wouldn't actually change this index's state, we can + // return early. (If there are dynamic offsets, the range will always + // be different.) + if dynamic_offsets.is_empty() { + if let Some(ref contents) = self.bind[slot as usize] { + if contents.bind_group_id == bind_group_id { + return; + } + } + } + + // Record the index's new state. + self.bind[slot as usize] = Some(BindState { + bind_group_id, + layout_id, + dynamic_offsets, + is_dirty: true, + }); + + // Once we've changed the bind group at a particular index, all + // subsequent indices need to be rewritten. + self.invalidate_bind_group_from(slot as usize + 1); + } + + /// Determine which bind group slots need to be re-set after a pipeline change. + /// + /// Given that we are switching from the current pipeline state to `new`, + /// whose layout is `layout`, mark all the bind group slots that we need to + /// emit new `SetBindGroup` commands for as dirty. + /// + /// According to `wgpu_hal`'s rules: + /// + /// - If the layout of any bind group slot changes, then that slot and + /// all following slots must have their bind groups re-established. + /// + /// - Changing the push constant ranges at all requires re-establishing + /// all bind groups. + fn invalidate_bind_groups( + &mut self, + new: &PipelineState, + layout: &binding_model::PipelineLayout<A>, + ) { + match self.pipeline { + None => { + // Establishing entirely new pipeline state. + self.invalidate_bind_group_from(0); + } + Some(ref old) => { + if old.id == new.id { + // Everything is derived from the pipeline, so if the id has + // not changed, there's no need to consider anything else. + return; + } + + // Any push constant change invalidates all groups. + if old.push_constant_ranges != new.push_constant_ranges { + self.invalidate_bind_group_from(0); + } else { + let first_changed = self + .bind + .iter() + .zip(&layout.bind_group_layout_ids) + .position(|(entry, &layout_id)| match *entry { + Some(ref contents) => contents.layout_id != layout_id, + None => false, + }); + if let Some(slot) = first_changed { + self.invalidate_bind_group_from(slot); + } + } + } + } + } + + /// Set the bundle's current index buffer and its associated parameters. + fn set_index_buffer( + &mut self, + buffer: id::BufferId, + format: wgt::IndexFormat, + range: Range<wgt::BufferAddress>, + ) { + match self.index { + Some(ref current) + if current.buffer == buffer + && current.format == format + && current.range == range => + { + return + } + _ => (), + } + + self.index = Some(IndexState { + buffer, + format, + range, + is_dirty: true, + }); + } + + /// Generate a `SetIndexBuffer` command to prepare for an indexed draw + /// command, if needed. + fn flush_index(&mut self) -> Option<RenderCommand> { + self.index.as_mut().and_then(|index| index.flush()) + } + + fn flush_vertices(&mut self) -> impl Iterator<Item = RenderCommand> + '_ { + self.vertex + .iter_mut() + .enumerate() + .flat_map(|(i, vs)| vs.as_mut().and_then(|vs| vs.flush(i as u32))) + } + + /// Generate `SetBindGroup` commands for any bind groups that need to be updated. + fn flush_binds( + &mut self, + used_bind_groups: usize, + dynamic_offsets: &[wgt::DynamicOffset], + ) -> impl Iterator<Item = RenderCommand> + '_ { + // Append each dirty bind group's dynamic offsets to `flat_dynamic_offsets`. + for contents in self.bind[..used_bind_groups].iter().flatten() { + if contents.is_dirty { + self.flat_dynamic_offsets + .extend_from_slice(&dynamic_offsets[contents.dynamic_offsets.clone()]); + } + } + + // Then, generate `SetBindGroup` commands to update the dirty bind + // groups. After this, all bind groups are clean. + self.bind[..used_bind_groups] + .iter_mut() + .enumerate() + .flat_map(|(i, entry)| { + if let Some(ref mut contents) = *entry { + if contents.is_dirty { + contents.is_dirty = false; + let offsets = &contents.dynamic_offsets; + return Some(RenderCommand::SetBindGroup { + index: i as u8, + bind_group_id: contents.bind_group_id, + num_dynamic_offsets: (offsets.end - offsets.start) as u8, + }); + } + } + None + }) + } +} + +/// Error encountered when finishing recording a render bundle. +#[derive(Clone, Debug, Error)] +pub(super) enum RenderBundleErrorInner { + #[error("resource is not valid to use with this render bundle because the resource and the bundle come from different devices")] + NotValidToUse, + #[error(transparent)] + Device(#[from] DeviceError), + #[error(transparent)] + RenderCommand(RenderCommandError), + #[error(transparent)] + Draw(#[from] DrawError), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), +} + +impl<T> From<T> for RenderBundleErrorInner +where + T: Into<RenderCommandError>, +{ + fn from(t: T) -> Self { + Self::RenderCommand(t.into()) + } +} + +/// Error encountered when finishing recording a render bundle. +#[derive(Clone, Debug, Error)] +#[error("{scope}")] +pub struct RenderBundleError { + pub scope: PassErrorScope, + #[source] + inner: RenderBundleErrorInner, +} + +impl RenderBundleError { + pub(crate) const INVALID_DEVICE: Self = RenderBundleError { + scope: PassErrorScope::Bundle, + inner: RenderBundleErrorInner::Device(DeviceError::Invalid), + }; +} +impl PrettyError for RenderBundleError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + // This error is wrapper for the inner error, + // but the scope has useful labels + fmt.error(self); + self.scope.fmt_pretty(fmt); + } +} + +impl<T, E> MapPassErr<T, RenderBundleError> for Result<T, E> +where + E: Into<RenderBundleErrorInner>, +{ + fn map_pass_err(self, scope: PassErrorScope) -> Result<T, RenderBundleError> { + self.map_err(|inner| RenderBundleError { + scope, + inner: inner.into(), + }) + } +} + +pub mod bundle_ffi { + use super::{RenderBundleEncoder, RenderCommand}; + use crate::{id, RawString}; + use std::{convert::TryInto, slice}; + use wgt::{BufferAddress, BufferSize, DynamicOffset, IndexFormat}; + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `offset_length` elements. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_set_bind_group( + bundle: &mut RenderBundleEncoder, + index: u32, + bind_group_id: id::BindGroupId, + offsets: *const DynamicOffset, + offset_length: usize, + ) { + let redundant = unsafe { + bundle.current_bind_groups.set_and_check_redundant( + bind_group_id, + index, + &mut bundle.base.dynamic_offsets, + offsets, + offset_length, + ) + }; + + if redundant { + return; + } + + bundle.base.commands.push(RenderCommand::SetBindGroup { + index: index.try_into().unwrap(), + num_dynamic_offsets: offset_length.try_into().unwrap(), + bind_group_id, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_set_pipeline( + bundle: &mut RenderBundleEncoder, + pipeline_id: id::RenderPipelineId, + ) { + if bundle.current_pipeline.set_and_check_redundant(pipeline_id) { + return; + } + + bundle + .base + .commands + .push(RenderCommand::SetPipeline(pipeline_id)); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_set_vertex_buffer( + bundle: &mut RenderBundleEncoder, + slot: u32, + buffer_id: id::BufferId, + offset: BufferAddress, + size: Option<BufferSize>, + ) { + bundle.base.commands.push(RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_set_index_buffer( + encoder: &mut RenderBundleEncoder, + buffer: id::BufferId, + index_format: IndexFormat, + offset: BufferAddress, + size: Option<BufferSize>, + ) { + encoder.set_index_buffer(buffer, index_format, offset, size); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `data` elements. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_set_push_constants( + pass: &mut RenderBundleEncoder, + stages: wgt::ShaderStages, + offset: u32, + size_bytes: u32, + data: *const u8, + ) { + assert_eq!( + offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant offset must be aligned to 4 bytes." + ); + assert_eq!( + size_bytes & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant size must be aligned to 4 bytes." + ); + let data_slice = unsafe { slice::from_raw_parts(data, size_bytes as usize) }; + let value_offset = pass.base.push_constant_data.len().try_into().expect( + "Ran out of push constant space. Don't set 4gb of push constants per RenderBundle.", + ); + + pass.base.push_constant_data.extend( + data_slice + .chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) + .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])), + ); + + pass.base.commands.push(RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset: Some(value_offset), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_draw( + bundle: &mut RenderBundleEncoder, + vertex_count: u32, + instance_count: u32, + first_vertex: u32, + first_instance: u32, + ) { + bundle.base.commands.push(RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_draw_indexed( + bundle: &mut RenderBundleEncoder, + index_count: u32, + instance_count: u32, + first_index: u32, + base_vertex: i32, + first_instance: u32, + ) { + bundle.base.commands.push(RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_draw_indirect( + bundle: &mut RenderBundleEncoder, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + bundle.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: false, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_draw_indexed_indirect( + bundle: &mut RenderBundleEncoder, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + bundle.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: true, + }); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given `label` + /// is a valid null-terminated string. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_push_debug_group( + _bundle: &mut RenderBundleEncoder, + _label: RawString, + ) { + //TODO + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_pop_debug_group(_bundle: &mut RenderBundleEncoder) { + //TODO + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given `label` + /// is a valid null-terminated string. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_insert_debug_marker( + _bundle: &mut RenderBundleEncoder, + _label: RawString, + ) { + //TODO + } +} diff --git a/third_party/rust/wgpu-core/src/command/clear.rs b/third_party/rust/wgpu-core/src/command/clear.rs new file mode 100644 index 0000000000..aa9d60638e --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/clear.rs @@ -0,0 +1,462 @@ +use std::{num::NonZeroU32, ops::Range}; + +#[cfg(feature = "trace")] +use crate::device::trace::Command as TraceCommand; +use crate::{ + command::CommandBuffer, + get_lowest_common_denom, + hub::{self, Global, GlobalIdentityHandlerFactory, HalApi, Token}, + id::{BufferId, CommandEncoderId, DeviceId, TextureId, Valid}, + init_tracker::{MemoryInitKind, TextureInitRange}, + resource::{Texture, TextureClearMode}, + track::{TextureSelector, TextureTracker}, +}; + +use hal::{auxil::align_to, CommandEncoder as _}; +use thiserror::Error; +use wgt::{BufferAddress, BufferSize, BufferUsages, ImageSubresourceRange, TextureAspect}; + +/// Error encountered while attempting a clear. +#[derive(Clone, Debug, Error)] +pub enum ClearError { + #[error("to use clear_texture the CLEAR_TEXTURE feature needs to be enabled")] + MissingClearTextureFeature, + #[error("command encoder {0:?} is invalid")] + InvalidCommandEncoder(CommandEncoderId), + #[error("device {0:?} is invalid")] + InvalidDevice(DeviceId), + #[error("buffer {0:?} is invalid or destroyed")] + InvalidBuffer(BufferId), + #[error("texture {0:?} is invalid or destroyed")] + InvalidTexture(TextureId), + #[error("texture {0:?} can not be cleared")] + NoValidTextureClearMode(TextureId), + #[error("buffer clear size {0:?} is not a multiple of `COPY_BUFFER_ALIGNMENT`")] + UnalignedFillSize(BufferSize), + #[error("buffer offset {0:?} is not a multiple of `COPY_BUFFER_ALIGNMENT`")] + UnalignedBufferOffset(BufferAddress), + #[error("clear of {start_offset}..{end_offset} would end up overrunning the bounds of the buffer of size {buffer_size}")] + BufferOverrun { + start_offset: BufferAddress, + end_offset: BufferAddress, + buffer_size: BufferAddress, + }, + #[error("destination buffer is missing the `COPY_DST` usage flag")] + MissingCopyDstUsageFlag(Option<BufferId>, Option<TextureId>), + #[error("texture lacks the aspects that were specified in the image subresource range. Texture with format {texture_format:?}, specified was {subresource_range_aspects:?}")] + MissingTextureAspect { + texture_format: wgt::TextureFormat, + subresource_range_aspects: TextureAspect, + }, + #[error("image subresource level range is outside of the texture's level range. texture range is {texture_level_range:?}, \ +whereas subesource range specified start {subresource_base_mip_level} and count {subresource_mip_level_count:?}")] + InvalidTextureLevelRange { + texture_level_range: Range<u32>, + subresource_base_mip_level: u32, + subresource_mip_level_count: Option<NonZeroU32>, + }, + #[error("image subresource layer range is outside of the texture's layer range. texture range is {texture_layer_range:?}, \ +whereas subesource range specified start {subresource_base_array_layer} and count {subresource_array_layer_count:?}")] + InvalidTextureLayerRange { + texture_layer_range: Range<u32>, + subresource_base_array_layer: u32, + subresource_array_layer_count: Option<NonZeroU32>, + }, +} + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + pub fn command_encoder_clear_buffer<A: HalApi>( + &self, + command_encoder_id: CommandEncoderId, + dst: BufferId, + offset: BufferAddress, + size: Option<BufferSize>, + ) -> Result<(), ClearError> { + profiling::scope!("CommandEncoder::fill_buffer"); + + let hub = A::hub(self); + let mut token = Token::root(); + let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); + let cmd_buf = CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, command_encoder_id) + .map_err(|_| ClearError::InvalidCommandEncoder(command_encoder_id))?; + let (buffer_guard, _) = hub.buffers.read(&mut token); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::ClearBuffer { dst, offset, size }); + } + + let (dst_buffer, dst_pending) = cmd_buf + .trackers + .buffers + .set_single(&*buffer_guard, dst, hal::BufferUses::COPY_DST) + .ok_or(ClearError::InvalidBuffer(dst))?; + let dst_raw = dst_buffer + .raw + .as_ref() + .ok_or(ClearError::InvalidBuffer(dst))?; + if !dst_buffer.usage.contains(BufferUsages::COPY_DST) { + return Err(ClearError::MissingCopyDstUsageFlag(Some(dst), None)); + } + + // Check if offset & size are valid. + if offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(ClearError::UnalignedBufferOffset(offset)); + } + if let Some(size) = size { + if size.get() % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(ClearError::UnalignedFillSize(size)); + } + let destination_end_offset = offset + size.get(); + if destination_end_offset > dst_buffer.size { + return Err(ClearError::BufferOverrun { + start_offset: offset, + end_offset: destination_end_offset, + buffer_size: dst_buffer.size, + }); + } + } + + let end = match size { + Some(size) => offset + size.get(), + None => dst_buffer.size, + }; + if offset == end { + log::trace!("Ignoring fill_buffer of size 0"); + return Ok(()); + } + + // Mark dest as initialized. + cmd_buf + .buffer_memory_init_actions + .extend(dst_buffer.initialization_status.create_action( + dst, + offset..end, + MemoryInitKind::ImplicitlyInitialized, + )); + // actual hal barrier & operation + let dst_barrier = dst_pending.map(|pending| pending.into_hal(dst_buffer)); + let cmd_buf_raw = cmd_buf.encoder.open(); + unsafe { + cmd_buf_raw.transition_buffers(dst_barrier.into_iter()); + cmd_buf_raw.clear_buffer(dst_raw, offset..end); + } + Ok(()) + } + + pub fn command_encoder_clear_texture<A: HalApi>( + &self, + command_encoder_id: CommandEncoderId, + dst: TextureId, + subresource_range: &ImageSubresourceRange, + ) -> Result<(), ClearError> { + profiling::scope!("CommandEncoder::clear_texture"); + + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.write(&mut token); + let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); + let cmd_buf = CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, command_encoder_id) + .map_err(|_| ClearError::InvalidCommandEncoder(command_encoder_id))?; + let (_, mut token) = hub.buffers.read(&mut token); // skip token + let (texture_guard, _) = hub.textures.read(&mut token); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::ClearTexture { + dst, + subresource_range: subresource_range.clone(), + }); + } + + if !cmd_buf.support_clear_texture { + return Err(ClearError::MissingClearTextureFeature); + } + + let dst_texture = texture_guard + .get(dst) + .map_err(|_| ClearError::InvalidTexture(dst))?; + + // Check if subresource aspects are valid. + let requested_aspects = hal::FormatAspects::from(subresource_range.aspect); + let clear_aspects = hal::FormatAspects::from(dst_texture.desc.format) & requested_aspects; + if clear_aspects.is_empty() { + return Err(ClearError::MissingTextureAspect { + texture_format: dst_texture.desc.format, + subresource_range_aspects: subresource_range.aspect, + }); + }; + + // Check if subresource level range is valid + let subresource_level_end = match subresource_range.mip_level_count { + Some(count) => subresource_range.base_mip_level + count.get(), + None => dst_texture.full_range.mips.end, + }; + if dst_texture.full_range.mips.start > subresource_range.base_mip_level + || dst_texture.full_range.mips.end < subresource_level_end + { + return Err(ClearError::InvalidTextureLevelRange { + texture_level_range: dst_texture.full_range.mips.clone(), + subresource_base_mip_level: subresource_range.base_mip_level, + subresource_mip_level_count: subresource_range.mip_level_count, + }); + } + // Check if subresource layer range is valid + let subresource_layer_end = match subresource_range.array_layer_count { + Some(count) => subresource_range.base_array_layer + count.get(), + None => dst_texture.full_range.layers.end, + }; + if dst_texture.full_range.layers.start > subresource_range.base_array_layer + || dst_texture.full_range.layers.end < subresource_layer_end + { + return Err(ClearError::InvalidTextureLayerRange { + texture_layer_range: dst_texture.full_range.layers.clone(), + subresource_base_array_layer: subresource_range.base_array_layer, + subresource_array_layer_count: subresource_range.array_layer_count, + }); + } + + let device = &device_guard[cmd_buf.device_id.value]; + + clear_texture( + &*texture_guard, + Valid(dst), + TextureInitRange { + mip_range: subresource_range.base_mip_level..subresource_level_end, + layer_range: subresource_range.base_array_layer..subresource_layer_end, + }, + cmd_buf.encoder.open(), + &mut cmd_buf.trackers.textures, + &device.alignments, + &device.zero_buffer, + ) + } +} + +pub(crate) fn clear_texture<A: HalApi>( + storage: &hub::Storage<Texture<A>, TextureId>, + dst_texture_id: Valid<TextureId>, + range: TextureInitRange, + encoder: &mut A::CommandEncoder, + texture_tracker: &mut TextureTracker<A>, + alignments: &hal::Alignments, + zero_buffer: &A::Buffer, +) -> Result<(), ClearError> { + let dst_texture = &storage[dst_texture_id]; + + let dst_raw = dst_texture + .inner + .as_raw() + .ok_or(ClearError::InvalidTexture(dst_texture_id.0))?; + + // Issue the right barrier. + let clear_usage = match dst_texture.clear_mode { + TextureClearMode::BufferCopy => hal::TextureUses::COPY_DST, + TextureClearMode::RenderPass { + is_color: false, .. + } => hal::TextureUses::DEPTH_STENCIL_WRITE, + TextureClearMode::RenderPass { is_color: true, .. } => hal::TextureUses::COLOR_TARGET, + TextureClearMode::None => { + return Err(ClearError::NoValidTextureClearMode(dst_texture_id.0)); + } + }; + + let selector = TextureSelector { + mips: range.mip_range.clone(), + layers: range.layer_range.clone(), + }; + + // If we're in a texture-init usecase, we know that the texture is already + // tracked since whatever caused the init requirement, will have caused the + // usage tracker to be aware of the texture. Meaning, that it is safe to + // call call change_replace_tracked if the life_guard is already gone (i.e. + // the user no longer holds on to this texture). + // + // On the other hand, when coming via command_encoder_clear_texture, the + // life_guard is still there since in order to call it a texture object is + // needed. + // + // We could in theory distinguish these two scenarios in the internal + // clear_texture api in order to remove this check and call the cheaper + // change_replace_tracked whenever possible. + let dst_barrier = texture_tracker + .set_single(dst_texture, dst_texture_id.0, selector, clear_usage) + .unwrap() + .map(|pending| pending.into_hal(dst_texture)); + unsafe { + encoder.transition_textures(dst_barrier.into_iter()); + } + + // Record actual clearing + match dst_texture.clear_mode { + TextureClearMode::BufferCopy => clear_texture_via_buffer_copies::<A>( + &dst_texture.desc, + alignments, + zero_buffer, + range, + encoder, + dst_raw, + ), + TextureClearMode::RenderPass { is_color, .. } => { + clear_texture_via_render_passes(dst_texture, range, is_color, encoder)? + } + TextureClearMode::None => { + return Err(ClearError::NoValidTextureClearMode(dst_texture_id.0)); + } + } + Ok(()) +} + +fn clear_texture_via_buffer_copies<A: hal::Api>( + texture_desc: &wgt::TextureDescriptor<()>, + alignments: &hal::Alignments, + zero_buffer: &A::Buffer, // Buffer of size device::ZERO_BUFFER_SIZE + range: TextureInitRange, + encoder: &mut A::CommandEncoder, + dst_raw: &A::Texture, +) { + // Gather list of zero_buffer copies and issue a single command then to perform them + let mut zero_buffer_copy_regions = Vec::new(); + let buffer_copy_pitch = alignments.buffer_copy_pitch.get() as u32; + let format_desc = texture_desc.format.describe(); + + let bytes_per_row_alignment = + get_lowest_common_denom(buffer_copy_pitch, format_desc.block_size as u32); + + for mip_level in range.mip_range { + let mut mip_size = texture_desc.mip_level_size(mip_level).unwrap(); + // Round to multiple of block size + mip_size.width = align_to(mip_size.width, format_desc.block_dimensions.0 as u32); + mip_size.height = align_to(mip_size.height, format_desc.block_dimensions.1 as u32); + + let bytes_per_row = align_to( + mip_size.width / format_desc.block_dimensions.0 as u32 * format_desc.block_size as u32, + bytes_per_row_alignment, + ); + + let max_rows_per_copy = crate::device::ZERO_BUFFER_SIZE as u32 / bytes_per_row; + // round down to a multiple of rows needed by the texture format + let max_rows_per_copy = max_rows_per_copy / format_desc.block_dimensions.1 as u32 + * format_desc.block_dimensions.1 as u32; + assert!( + max_rows_per_copy > 0, + "Zero buffer size is too small to fill a single row \ + of a texture with format {:?} and desc {:?}", + texture_desc.format, + texture_desc.size + ); + + let z_range = 0..(if texture_desc.dimension == wgt::TextureDimension::D3 { + mip_size.depth_or_array_layers + } else { + 1 + }); + + for array_layer in range.layer_range.clone() { + // TODO: Only doing one layer at a time for volume textures right now. + for z in z_range.clone() { + // May need multiple copies for each subresource! However, we + // assume that we never need to split a row. + let mut num_rows_left = mip_size.height; + while num_rows_left > 0 { + let num_rows = num_rows_left.min(max_rows_per_copy); + + zero_buffer_copy_regions.push(hal::BufferTextureCopy { + buffer_layout: wgt::ImageDataLayout { + offset: 0, + bytes_per_row: NonZeroU32::new(bytes_per_row), + rows_per_image: None, + }, + texture_base: hal::TextureCopyBase { + mip_level, + array_layer, + origin: wgt::Origin3d { + x: 0, // Always full rows + y: mip_size.height - num_rows_left, + z, + }, + aspect: hal::FormatAspects::all(), + }, + size: hal::CopyExtent { + width: mip_size.width, // full row + height: num_rows, + depth: 1, // Only single slice of volume texture at a time right now + }, + }); + + num_rows_left -= num_rows; + } + } + } + } + + unsafe { + encoder.copy_buffer_to_texture(zero_buffer, dst_raw, zero_buffer_copy_regions.into_iter()); + } +} + +fn clear_texture_via_render_passes<A: hal::Api>( + dst_texture: &Texture<A>, + range: TextureInitRange, + is_color: bool, + encoder: &mut A::CommandEncoder, +) -> Result<(), ClearError> { + let extent_base = wgt::Extent3d { + width: dst_texture.desc.size.width, + height: dst_texture.desc.size.height, + depth_or_array_layers: 1, // Only one layer or slice is cleared at a time. + }; + + let sample_count = dst_texture.desc.sample_count; + let is_3d_texture = dst_texture.desc.dimension == wgt::TextureDimension::D3; + for mip_level in range.mip_range { + let extent = extent_base.mip_level_size(mip_level, is_3d_texture); + let layer_or_depth_range = if dst_texture.desc.dimension == wgt::TextureDimension::D3 { + // TODO: We assume that we're allowed to do clear operations on + // volume texture slices, this is not properly specified. + 0..extent.depth_or_array_layers + } else { + range.layer_range.clone() + }; + for depth_or_layer in layer_or_depth_range { + let color_attachments_tmp; + let (color_attachments, depth_stencil_attachment) = if is_color { + color_attachments_tmp = [Some(hal::ColorAttachment { + target: hal::Attachment { + view: dst_texture.get_clear_view(mip_level, depth_or_layer), + usage: hal::TextureUses::COLOR_TARGET, + }, + resolve_target: None, + ops: hal::AttachmentOps::STORE, + clear_value: wgt::Color::TRANSPARENT, + })]; + (&color_attachments_tmp[..], None) + } else { + ( + &[][..], + Some(hal::DepthStencilAttachment { + target: hal::Attachment { + view: dst_texture.get_clear_view(mip_level, depth_or_layer), + usage: hal::TextureUses::DEPTH_STENCIL_WRITE, + }, + depth_ops: hal::AttachmentOps::STORE, + stencil_ops: hal::AttachmentOps::STORE, + clear_value: (0.0, 0), + }), + ) + }; + unsafe { + encoder.begin_render_pass(&hal::RenderPassDescriptor { + label: Some("(wgpu internal) clear_texture clear pass"), + extent, + sample_count, + color_attachments, + depth_stencil_attachment, + multiview: None, + }); + encoder.end_render_pass(); + } + } + } + Ok(()) +} diff --git a/third_party/rust/wgpu-core/src/command/compute.rs b/third_party/rust/wgpu-core/src/command/compute.rs new file mode 100644 index 0000000000..dec6211a73 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/compute.rs @@ -0,0 +1,975 @@ +use crate::{ + binding_model::{ + BindError, BindGroup, LateMinBufferBindingSizeMismatch, PushConstantUploadError, + }, + command::{ + bind::Binder, + end_pipeline_statistics_query, + memory_init::{fixup_discarded_surfaces, SurfacesInDiscardState}, + BasePass, BasePassRef, BindGroupStateChange, CommandBuffer, CommandEncoderError, + CommandEncoderStatus, MapPassErr, PassErrorScope, QueryUseError, StateChange, + }, + device::{MissingDownlevelFlags, MissingFeatures}, + error::{ErrorFormatter, PrettyError}, + hub::{Global, GlobalIdentityHandlerFactory, HalApi, Storage, Token}, + id, + init_tracker::MemoryInitKind, + pipeline, + resource::{self, Buffer, Texture}, + track::{Tracker, UsageConflict, UsageScope}, + validation::{check_buffer_usage, MissingBufferUsageError}, + Label, +}; + +use hal::CommandEncoder as _; +use thiserror::Error; + +use std::{fmt, mem, str}; + +#[doc(hidden)] +#[derive(Clone, Copy, Debug)] +#[cfg_attr( + any(feature = "serial-pass", feature = "trace"), + derive(serde::Serialize) +)] +#[cfg_attr( + any(feature = "serial-pass", feature = "replay"), + derive(serde::Deserialize) +)] +pub enum ComputeCommand { + SetBindGroup { + index: u8, + num_dynamic_offsets: u8, + bind_group_id: id::BindGroupId, + }, + SetPipeline(id::ComputePipelineId), + + /// Set a range of push constants to values stored in [`BasePass::push_constant_data`]. + SetPushConstant { + /// The byte offset within the push constant storage to write to. This + /// must be a multiple of four. + offset: u32, + + /// The number of bytes to write. This must be a multiple of four. + size_bytes: u32, + + /// Index in [`BasePass::push_constant_data`] of the start of the data + /// to be written. + /// + /// Note: this is not a byte offset like `offset`. Rather, it is the + /// index of the first `u32` element in `push_constant_data` to read. + values_offset: u32, + }, + + Dispatch([u32; 3]), + DispatchIndirect { + buffer_id: id::BufferId, + offset: wgt::BufferAddress, + }, + PushDebugGroup { + color: u32, + len: usize, + }, + PopDebugGroup, + InsertDebugMarker { + color: u32, + len: usize, + }, + WriteTimestamp { + query_set_id: id::QuerySetId, + query_index: u32, + }, + BeginPipelineStatisticsQuery { + query_set_id: id::QuerySetId, + query_index: u32, + }, + EndPipelineStatisticsQuery, +} + +#[cfg_attr(feature = "serial-pass", derive(serde::Deserialize, serde::Serialize))] +pub struct ComputePass { + base: BasePass<ComputeCommand>, + parent_id: id::CommandEncoderId, + + // Resource binding dedupe state. + #[cfg_attr(feature = "serial-pass", serde(skip))] + current_bind_groups: BindGroupStateChange, + #[cfg_attr(feature = "serial-pass", serde(skip))] + current_pipeline: StateChange<id::ComputePipelineId>, +} + +impl ComputePass { + pub fn new(parent_id: id::CommandEncoderId, desc: &ComputePassDescriptor) -> Self { + Self { + base: BasePass::new(&desc.label), + parent_id, + + current_bind_groups: BindGroupStateChange::new(), + current_pipeline: StateChange::new(), + } + } + + pub fn parent_id(&self) -> id::CommandEncoderId { + self.parent_id + } + + #[cfg(feature = "trace")] + pub fn into_command(self) -> crate::device::trace::Command { + crate::device::trace::Command::RunComputePass { base: self.base } + } +} + +impl fmt::Debug for ComputePass { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "ComputePass {{ encoder_id: {:?}, data: {:?} commands and {:?} dynamic offsets }}", + self.parent_id, + self.base.commands.len(), + self.base.dynamic_offsets.len() + ) + } +} + +#[derive(Clone, Debug, Default)] +pub struct ComputePassDescriptor<'a> { + pub label: Label<'a>, +} + +#[derive(Clone, Debug, Error, Eq, PartialEq)] +pub enum DispatchError { + #[error("compute pipeline must be set")] + MissingPipeline, + #[error("the pipeline layout, associated with the current compute pipeline, contains a bind group layout at index {index} which is incompatible with the bind group layout associated with the bind group at {index}")] + IncompatibleBindGroup { + index: u32, + //expected: BindGroupLayoutId, + //provided: Option<(BindGroupLayoutId, BindGroupId)>, + }, + #[error( + "each current dispatch group size dimension ({current:?}) must be less or equal to {limit}" + )] + InvalidGroupSize { current: [u32; 3], limit: u32 }, + #[error(transparent)] + BindingSizeTooSmall(#[from] LateMinBufferBindingSizeMismatch), +} + +/// Error encountered when performing a compute pass. +#[derive(Clone, Debug, Error)] +pub enum ComputePassErrorInner { + #[error(transparent)] + Encoder(#[from] CommandEncoderError), + #[error("bind group {0:?} is invalid")] + InvalidBindGroup(id::BindGroupId), + #[error("bind group index {index} is greater than the device's requested `max_bind_group` limit {max}")] + BindGroupIndexOutOfRange { index: u8, max: u32 }, + #[error("compute pipeline {0:?} is invalid")] + InvalidPipeline(id::ComputePipelineId), + #[error("QuerySet {0:?} is invalid")] + InvalidQuerySet(id::QuerySetId), + #[error("indirect buffer {0:?} is invalid or destroyed")] + InvalidIndirectBuffer(id::BufferId), + #[error("indirect buffer uses bytes {offset}..{end_offset} which overruns indirect buffer of size {buffer_size}")] + IndirectBufferOverrun { + offset: u64, + end_offset: u64, + buffer_size: u64, + }, + #[error("buffer {0:?} is invalid or destroyed")] + InvalidBuffer(id::BufferId), + #[error(transparent)] + ResourceUsageConflict(#[from] UsageConflict), + #[error(transparent)] + MissingBufferUsage(#[from] MissingBufferUsageError), + #[error("cannot pop debug group, because number of pushed debug groups is zero")] + InvalidPopDebugGroup, + #[error(transparent)] + Dispatch(#[from] DispatchError), + #[error(transparent)] + Bind(#[from] BindError), + #[error(transparent)] + PushConstants(#[from] PushConstantUploadError), + #[error(transparent)] + QueryUse(#[from] QueryUseError), + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), +} + +impl PrettyError for ComputePassErrorInner { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + match *self { + Self::InvalidBindGroup(id) => { + fmt.bind_group_label(&id); + } + Self::InvalidPipeline(id) => { + fmt.compute_pipeline_label(&id); + } + Self::InvalidIndirectBuffer(id) => { + fmt.buffer_label(&id); + } + _ => {} + }; + } +} + +/// Error encountered when performing a compute pass. +#[derive(Clone, Debug, Error)] +#[error("{scope}")] +pub struct ComputePassError { + pub scope: PassErrorScope, + #[source] + inner: ComputePassErrorInner, +} +impl PrettyError for ComputePassError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + // This error is wrapper for the inner error, + // but the scope has useful labels + fmt.error(self); + self.scope.fmt_pretty(fmt); + } +} + +impl<T, E> MapPassErr<T, ComputePassError> for Result<T, E> +where + E: Into<ComputePassErrorInner>, +{ + fn map_pass_err(self, scope: PassErrorScope) -> Result<T, ComputePassError> { + self.map_err(|inner| ComputePassError { + scope, + inner: inner.into(), + }) + } +} + +struct State<A: HalApi> { + binder: Binder, + pipeline: Option<id::ComputePipelineId>, + scope: UsageScope<A>, + debug_scope_depth: u32, +} + +impl<A: HalApi> State<A> { + fn is_ready(&self) -> Result<(), DispatchError> { + let bind_mask = self.binder.invalid_mask(); + if bind_mask != 0 { + //let (expected, provided) = self.binder.entries[index as usize].info(); + return Err(DispatchError::IncompatibleBindGroup { + index: bind_mask.trailing_zeros(), + }); + } + if self.pipeline.is_none() { + return Err(DispatchError::MissingPipeline); + } + self.binder.check_late_buffer_bindings()?; + + Ok(()) + } + + // `extra_buffer` is there to represent the indirect buffer that is also + // part of the usage scope. + fn flush_states( + &mut self, + raw_encoder: &mut A::CommandEncoder, + base_trackers: &mut Tracker<A>, + bind_group_guard: &Storage<BindGroup<A>, id::BindGroupId>, + buffer_guard: &Storage<Buffer<A>, id::BufferId>, + texture_guard: &Storage<Texture<A>, id::TextureId>, + indirect_buffer: Option<id::Valid<id::BufferId>>, + ) -> Result<(), UsageConflict> { + for id in self.binder.list_active() { + unsafe { + self.scope + .merge_bind_group(texture_guard, &bind_group_guard[id].used)? + }; + // Note: stateless trackers are not merged: the lifetime reference + // is held to the bind group itself. + } + + for id in self.binder.list_active() { + unsafe { + base_trackers.set_and_remove_from_usage_scope_sparse( + texture_guard, + &mut self.scope, + &bind_group_guard[id].used, + ) + } + } + + // Add the state of the indirect buffer if it hasn't been hit before. + unsafe { + base_trackers + .buffers + .set_and_remove_from_usage_scope_sparse(&mut self.scope.buffers, indirect_buffer); + } + + log::trace!("Encoding dispatch barriers"); + + CommandBuffer::drain_barriers(raw_encoder, base_trackers, buffer_guard, texture_guard); + Ok(()) + } +} + +// Common routines between render/compute + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + pub fn command_encoder_run_compute_pass<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + pass: &ComputePass, + ) -> Result<(), ComputePassError> { + self.command_encoder_run_compute_pass_impl::<A>(encoder_id, pass.base.as_ref()) + } + + #[doc(hidden)] + pub fn command_encoder_run_compute_pass_impl<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + base: BasePassRef<ComputeCommand>, + ) -> Result<(), ComputePassError> { + profiling::scope!("CommandEncoder::run_compute_pass"); + let init_scope = PassErrorScope::Pass(encoder_id); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (device_guard, mut token) = hub.devices.read(&mut token); + + let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); + // Spell out the type, to placate rust-analyzer. + // https://github.com/rust-lang/rust-analyzer/issues/12247 + let cmd_buf: &mut CommandBuffer<A> = + CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, encoder_id) + .map_pass_err(init_scope)?; + // will be reset to true if recording is done without errors + cmd_buf.status = CommandEncoderStatus::Error; + let raw = cmd_buf.encoder.open(); + + let device = &device_guard[cmd_buf.device_id.value]; + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(crate::device::trace::Command::RunComputePass { + base: BasePass::from_ref(base), + }); + } + + let (_, mut token) = hub.render_bundles.read(&mut token); + let (pipeline_layout_guard, mut token) = hub.pipeline_layouts.read(&mut token); + let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); + let (pipeline_guard, mut token) = hub.compute_pipelines.read(&mut token); + let (query_set_guard, mut token) = hub.query_sets.read(&mut token); + let (buffer_guard, mut token) = hub.buffers.read(&mut token); + let (texture_guard, _) = hub.textures.read(&mut token); + + let mut state = State { + binder: Binder::new(), + pipeline: None, + scope: UsageScope::new(&*buffer_guard, &*texture_guard), + debug_scope_depth: 0, + }; + let mut temp_offsets = Vec::new(); + let mut dynamic_offset_count = 0; + let mut string_offset = 0; + let mut active_query = None; + + cmd_buf.trackers.set_size( + Some(&*buffer_guard), + Some(&*texture_guard), + None, + None, + Some(&*bind_group_guard), + Some(&*pipeline_guard), + None, + None, + Some(&*query_set_guard), + ); + + let hal_desc = hal::ComputePassDescriptor { label: base.label }; + unsafe { + raw.begin_compute_pass(&hal_desc); + } + + // Immediate texture inits required because of prior discards. Need to + // be inserted before texture reads. + let mut pending_discard_init_fixups = SurfacesInDiscardState::new(); + + for command in base.commands { + match *command { + ComputeCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => { + let scope = PassErrorScope::SetBindGroup(bind_group_id); + + let max_bind_groups = cmd_buf.limits.max_bind_groups; + if (index as u32) >= max_bind_groups { + return Err(ComputePassErrorInner::BindGroupIndexOutOfRange { + index, + max: max_bind_groups, + }) + .map_pass_err(scope); + } + + temp_offsets.clear(); + temp_offsets.extend_from_slice( + &base.dynamic_offsets[dynamic_offset_count + ..dynamic_offset_count + (num_dynamic_offsets as usize)], + ); + dynamic_offset_count += num_dynamic_offsets as usize; + + let bind_group: &BindGroup<A> = cmd_buf + .trackers + .bind_groups + .add_single(&*bind_group_guard, bind_group_id) + .ok_or(ComputePassErrorInner::InvalidBindGroup(bind_group_id)) + .map_pass_err(scope)?; + bind_group + .validate_dynamic_bindings(index, &temp_offsets, &cmd_buf.limits) + .map_pass_err(scope)?; + + cmd_buf.buffer_memory_init_actions.extend( + bind_group.used_buffer_ranges.iter().filter_map( + |action| match buffer_guard.get(action.id) { + Ok(buffer) => buffer.initialization_status.check_action(action), + Err(_) => None, + }, + ), + ); + + for action in bind_group.used_texture_ranges.iter() { + pending_discard_init_fixups.extend( + cmd_buf + .texture_memory_actions + .register_init_action(action, &texture_guard), + ); + } + + let pipeline_layout_id = state.binder.pipeline_layout_id; + let entries = state.binder.assign_group( + index as usize, + id::Valid(bind_group_id), + bind_group, + &temp_offsets, + ); + if !entries.is_empty() { + let pipeline_layout = + &pipeline_layout_guard[pipeline_layout_id.unwrap()].raw; + for (i, e) in entries.iter().enumerate() { + let raw_bg = &bind_group_guard[e.group_id.as_ref().unwrap().value].raw; + unsafe { + raw.set_bind_group( + pipeline_layout, + index as u32 + i as u32, + raw_bg, + &e.dynamic_offsets, + ); + } + } + } + } + ComputeCommand::SetPipeline(pipeline_id) => { + let scope = PassErrorScope::SetPipelineCompute(pipeline_id); + + state.pipeline = Some(pipeline_id); + + let pipeline: &pipeline::ComputePipeline<A> = cmd_buf + .trackers + .compute_pipelines + .add_single(&*pipeline_guard, pipeline_id) + .ok_or(ComputePassErrorInner::InvalidPipeline(pipeline_id)) + .map_pass_err(scope)?; + + unsafe { + raw.set_compute_pipeline(&pipeline.raw); + } + + // Rebind resources + if state.binder.pipeline_layout_id != Some(pipeline.layout_id.value) { + let pipeline_layout = &pipeline_layout_guard[pipeline.layout_id.value]; + + let (start_index, entries) = state.binder.change_pipeline_layout( + &*pipeline_layout_guard, + pipeline.layout_id.value, + &pipeline.late_sized_buffer_groups, + ); + if !entries.is_empty() { + for (i, e) in entries.iter().enumerate() { + let raw_bg = + &bind_group_guard[e.group_id.as_ref().unwrap().value].raw; + unsafe { + raw.set_bind_group( + &pipeline_layout.raw, + start_index as u32 + i as u32, + raw_bg, + &e.dynamic_offsets, + ); + } + } + } + + // Clear push constant ranges + let non_overlapping = super::bind::compute_nonoverlapping_ranges( + &pipeline_layout.push_constant_ranges, + ); + for range in non_overlapping { + let offset = range.range.start; + let size_bytes = range.range.end - offset; + super::push_constant_clear( + offset, + size_bytes, + |clear_offset, clear_data| unsafe { + raw.set_push_constants( + &pipeline_layout.raw, + wgt::ShaderStages::COMPUTE, + clear_offset, + clear_data, + ); + }, + ); + } + } + } + ComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset, + } => { + let scope = PassErrorScope::SetPushConstant; + + let end_offset_bytes = offset + size_bytes; + let values_end_offset = + (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize; + let data_slice = + &base.push_constant_data[(values_offset as usize)..values_end_offset]; + + let pipeline_layout_id = state + .binder + .pipeline_layout_id + //TODO: don't error here, lazily update the push constants + .ok_or(ComputePassErrorInner::Dispatch( + DispatchError::MissingPipeline, + )) + .map_pass_err(scope)?; + let pipeline_layout = &pipeline_layout_guard[pipeline_layout_id]; + + pipeline_layout + .validate_push_constant_ranges( + wgt::ShaderStages::COMPUTE, + offset, + end_offset_bytes, + ) + .map_pass_err(scope)?; + + unsafe { + raw.set_push_constants( + &pipeline_layout.raw, + wgt::ShaderStages::COMPUTE, + offset, + data_slice, + ); + } + } + ComputeCommand::Dispatch(groups) => { + let scope = PassErrorScope::Dispatch { + indirect: false, + pipeline: state.pipeline, + }; + + fixup_discarded_surfaces( + pending_discard_init_fixups.drain(..), + raw, + &texture_guard, + &mut cmd_buf.trackers.textures, + device, + ); + + state.is_ready().map_pass_err(scope)?; + state + .flush_states( + raw, + &mut cmd_buf.trackers, + &*bind_group_guard, + &*buffer_guard, + &*texture_guard, + None, + ) + .map_pass_err(scope)?; + + let groups_size_limit = cmd_buf.limits.max_compute_workgroups_per_dimension; + + if groups[0] > groups_size_limit + || groups[1] > groups_size_limit + || groups[2] > groups_size_limit + { + return Err(ComputePassErrorInner::Dispatch( + DispatchError::InvalidGroupSize { + current: groups, + limit: groups_size_limit, + }, + )) + .map_pass_err(scope); + } + + unsafe { + raw.dispatch(groups); + } + } + ComputeCommand::DispatchIndirect { buffer_id, offset } => { + let scope = PassErrorScope::Dispatch { + indirect: true, + pipeline: state.pipeline, + }; + + state.is_ready().map_pass_err(scope)?; + + device + .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) + .map_pass_err(scope)?; + + let indirect_buffer: &Buffer<A> = state + .scope + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDIRECT) + .map_pass_err(scope)?; + check_buffer_usage(indirect_buffer.usage, wgt::BufferUsages::INDIRECT) + .map_pass_err(scope)?; + + let end_offset = offset + mem::size_of::<wgt::DispatchIndirectArgs>() as u64; + if end_offset > indirect_buffer.size { + return Err(ComputePassErrorInner::IndirectBufferOverrun { + offset, + end_offset, + buffer_size: indirect_buffer.size, + }) + .map_pass_err(scope); + } + + let buf_raw = indirect_buffer + .raw + .as_ref() + .ok_or(ComputePassErrorInner::InvalidIndirectBuffer(buffer_id)) + .map_pass_err(scope)?; + + let stride = 3 * 4; // 3 integers, x/y/z group size + + cmd_buf.buffer_memory_init_actions.extend( + indirect_buffer.initialization_status.create_action( + buffer_id, + offset..(offset + stride), + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + state + .flush_states( + raw, + &mut cmd_buf.trackers, + &*bind_group_guard, + &*buffer_guard, + &*texture_guard, + Some(id::Valid(buffer_id)), + ) + .map_pass_err(scope)?; + unsafe { + raw.dispatch_indirect(buf_raw, offset); + } + } + ComputeCommand::PushDebugGroup { color: _, len } => { + state.debug_scope_depth += 1; + let label = + str::from_utf8(&base.string_data[string_offset..string_offset + len]) + .unwrap(); + string_offset += len; + unsafe { + raw.begin_debug_marker(label); + } + } + ComputeCommand::PopDebugGroup => { + let scope = PassErrorScope::PopDebugGroup; + + if state.debug_scope_depth == 0 { + return Err(ComputePassErrorInner::InvalidPopDebugGroup) + .map_pass_err(scope); + } + state.debug_scope_depth -= 1; + unsafe { + raw.end_debug_marker(); + } + } + ComputeCommand::InsertDebugMarker { color: _, len } => { + let label = + str::from_utf8(&base.string_data[string_offset..string_offset + len]) + .unwrap(); + string_offset += len; + unsafe { raw.insert_debug_marker(label) } + } + ComputeCommand::WriteTimestamp { + query_set_id, + query_index, + } => { + let scope = PassErrorScope::WriteTimestamp; + + device + .require_features(wgt::Features::WRITE_TIMESTAMP_INSIDE_PASSES) + .map_pass_err(scope)?; + + let query_set: &resource::QuerySet<A> = cmd_buf + .trackers + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(ComputePassErrorInner::InvalidQuerySet(query_set_id)) + .map_pass_err(scope)?; + + query_set + .validate_and_write_timestamp(raw, query_set_id, query_index, None) + .map_pass_err(scope)?; + } + ComputeCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + } => { + let scope = PassErrorScope::BeginPipelineStatisticsQuery; + + let query_set: &resource::QuerySet<A> = cmd_buf + .trackers + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(ComputePassErrorInner::InvalidQuerySet(query_set_id)) + .map_pass_err(scope)?; + + query_set + .validate_and_begin_pipeline_statistics_query( + raw, + query_set_id, + query_index, + None, + &mut active_query, + ) + .map_pass_err(scope)?; + } + ComputeCommand::EndPipelineStatisticsQuery => { + let scope = PassErrorScope::EndPipelineStatisticsQuery; + + end_pipeline_statistics_query(raw, &*query_set_guard, &mut active_query) + .map_pass_err(scope)?; + } + } + } + + unsafe { + raw.end_compute_pass(); + } + cmd_buf.status = CommandEncoderStatus::Recording; + + // There can be entries left in pending_discard_init_fixups if a bind + // group was set, but not used (i.e. no Dispatch occurred) + // + // However, we already altered the discard/init_action state on this + // cmd_buf, so we need to apply the promised changes. + fixup_discarded_surfaces( + pending_discard_init_fixups.into_iter(), + raw, + &texture_guard, + &mut cmd_buf.trackers.textures, + device, + ); + + Ok(()) + } +} + +pub mod compute_ffi { + use super::{ComputeCommand, ComputePass}; + use crate::{id, RawString}; + use std::{convert::TryInto, ffi, slice}; + use wgt::{BufferAddress, DynamicOffset}; + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `offset_length` elements. + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_set_bind_group( + pass: &mut ComputePass, + index: u32, + bind_group_id: id::BindGroupId, + offsets: *const DynamicOffset, + offset_length: usize, + ) { + let redundant = unsafe { + pass.current_bind_groups.set_and_check_redundant( + bind_group_id, + index, + &mut pass.base.dynamic_offsets, + offsets, + offset_length, + ) + }; + + if redundant { + return; + } + + pass.base.commands.push(ComputeCommand::SetBindGroup { + index: index.try_into().unwrap(), + num_dynamic_offsets: offset_length.try_into().unwrap(), + bind_group_id, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_set_pipeline( + pass: &mut ComputePass, + pipeline_id: id::ComputePipelineId, + ) { + if pass.current_pipeline.set_and_check_redundant(pipeline_id) { + return; + } + + pass.base + .commands + .push(ComputeCommand::SetPipeline(pipeline_id)); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `size_bytes` bytes. + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_set_push_constant( + pass: &mut ComputePass, + offset: u32, + size_bytes: u32, + data: *const u8, + ) { + assert_eq!( + offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant offset must be aligned to 4 bytes." + ); + assert_eq!( + size_bytes & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant size must be aligned to 4 bytes." + ); + let data_slice = unsafe { slice::from_raw_parts(data, size_bytes as usize) }; + let value_offset = pass.base.push_constant_data.len().try_into().expect( + "Ran out of push constant space. Don't set 4gb of push constants per ComputePass.", + ); + + pass.base.push_constant_data.extend( + data_slice + .chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) + .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])), + ); + + pass.base.commands.push(ComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset: value_offset, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_dispatch_workgroups( + pass: &mut ComputePass, + groups_x: u32, + groups_y: u32, + groups_z: u32, + ) { + pass.base + .commands + .push(ComputeCommand::Dispatch([groups_x, groups_y, groups_z])); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_dispatch_workgroups_indirect( + pass: &mut ComputePass, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + pass.base + .commands + .push(ComputeCommand::DispatchIndirect { buffer_id, offset }); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given `label` + /// is a valid null-terminated string. + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_push_debug_group( + pass: &mut ComputePass, + label: RawString, + color: u32, + ) { + let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pass.base.string_data.extend_from_slice(bytes); + + pass.base.commands.push(ComputeCommand::PushDebugGroup { + color, + len: bytes.len(), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_pop_debug_group(pass: &mut ComputePass) { + pass.base.commands.push(ComputeCommand::PopDebugGroup); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given `label` + /// is a valid null-terminated string. + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_insert_debug_marker( + pass: &mut ComputePass, + label: RawString, + color: u32, + ) { + let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pass.base.string_data.extend_from_slice(bytes); + + pass.base.commands.push(ComputeCommand::InsertDebugMarker { + color, + len: bytes.len(), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_write_timestamp( + pass: &mut ComputePass, + query_set_id: id::QuerySetId, + query_index: u32, + ) { + pass.base.commands.push(ComputeCommand::WriteTimestamp { + query_set_id, + query_index, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_begin_pipeline_statistics_query( + pass: &mut ComputePass, + query_set_id: id::QuerySetId, + query_index: u32, + ) { + pass.base + .commands + .push(ComputeCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_end_pipeline_statistics_query(pass: &mut ComputePass) { + pass.base + .commands + .push(ComputeCommand::EndPipelineStatisticsQuery); + } +} diff --git a/third_party/rust/wgpu-core/src/command/draw.rs b/third_party/rust/wgpu-core/src/command/draw.rs new file mode 100644 index 0000000000..24b0be9d53 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/draw.rs @@ -0,0 +1,250 @@ +/*! Draw structures - shared between render passes and bundles. +!*/ + +use crate::{ + binding_model::{LateMinBufferBindingSizeMismatch, PushConstantUploadError}, + error::ErrorFormatter, + id, + track::UsageConflict, + validation::{MissingBufferUsageError, MissingTextureUsageError}, +}; +use wgt::{BufferAddress, BufferSize, Color}; + +use std::num::NonZeroU32; +use thiserror::Error; + +/// Error validating a draw call. +#[derive(Clone, Debug, Error, Eq, PartialEq)] +pub enum DrawError { + #[error("blend constant needs to be set")] + MissingBlendConstant, + #[error("render pipeline must be set")] + MissingPipeline, + #[error("vertex buffer {index} must be set")] + MissingVertexBuffer { index: u32 }, + #[error("index buffer must be set")] + MissingIndexBuffer, + #[error("the pipeline layout, associated with the current render pipeline, contains a bind group layout at index {index} which is incompatible with the bind group layout associated with the bind group at {index}")] + IncompatibleBindGroup { + index: u32, + //expected: BindGroupLayoutId, + //provided: Option<(BindGroupLayoutId, BindGroupId)>, + }, + #[error("vertex {last_vertex} extends beyond limit {vertex_limit} imposed by the buffer in slot {slot}. Did you bind the correct `Vertex` step-rate vertex buffer?")] + VertexBeyondLimit { + last_vertex: u32, + vertex_limit: u32, + slot: u32, + }, + #[error("instance {last_instance} extends beyond limit {instance_limit} imposed by the buffer in slot {slot}. Did you bind the correct `Instance` step-rate vertex buffer?")] + InstanceBeyondLimit { + last_instance: u32, + instance_limit: u32, + slot: u32, + }, + #[error("index {last_index} extends beyond limit {index_limit}. Did you bind the correct index buffer?")] + IndexBeyondLimit { last_index: u32, index_limit: u32 }, + #[error( + "pipeline index format ({pipeline:?}) and buffer index format ({buffer:?}) do not match" + )] + UnmatchedIndexFormats { + pipeline: wgt::IndexFormat, + buffer: wgt::IndexFormat, + }, + #[error(transparent)] + BindingSizeTooSmall(#[from] LateMinBufferBindingSizeMismatch), +} + +/// Error encountered when encoding a render command. +/// This is the shared error set between render bundles and passes. +#[derive(Clone, Debug, Error)] +pub enum RenderCommandError { + #[error("bind group {0:?} is invalid")] + InvalidBindGroup(id::BindGroupId), + #[error("render bundle {0:?} is invalid")] + InvalidRenderBundle(id::RenderBundleId), + #[error("bind group index {index} is greater than the device's requested `max_bind_group` limit {max}")] + BindGroupIndexOutOfRange { index: u8, max: u32 }, + #[error("dynamic buffer offset {0} does not respect device's requested `{1}` limit {2}")] + UnalignedBufferOffset(u64, &'static str, u32), + #[error("number of buffer offsets ({actual}) does not match the number of dynamic bindings ({expected})")] + InvalidDynamicOffsetCount { actual: usize, expected: usize }, + #[error("render pipeline {0:?} is invalid")] + InvalidPipeline(id::RenderPipelineId), + #[error("QuerySet {0:?} is invalid")] + InvalidQuerySet(id::QuerySetId), + #[error("Render pipeline targets are incompatible with render pass")] + IncompatiblePipelineTargets(#[from] crate::device::RenderPassCompatibilityError), + #[error("pipeline writes to depth/stencil, while the pass has read-only depth/stencil")] + IncompatiblePipelineRods, + #[error(transparent)] + UsageConflict(#[from] UsageConflict), + #[error("buffer {0:?} is destroyed")] + DestroyedBuffer(id::BufferId), + #[error(transparent)] + MissingBufferUsage(#[from] MissingBufferUsageError), + #[error(transparent)] + MissingTextureUsage(#[from] MissingTextureUsageError), + #[error(transparent)] + PushConstants(#[from] PushConstantUploadError), + #[error("Viewport width {0} and/or height {1} are less than or equal to 0")] + InvalidViewportDimension(f32, f32), + #[error("Viewport minDepth {0} and/or maxDepth {1} are not in [0, 1]")] + InvalidViewportDepth(f32, f32), + #[error("Scissor {0:?} is not contained in the render target {1:?}")] + InvalidScissorRect(Rect<u32>, wgt::Extent3d), + #[error("Support for {0} is not implemented yet")] + Unimplemented(&'static str), +} +impl crate::error::PrettyError for RenderCommandError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + match *self { + Self::InvalidBindGroup(id) => { + fmt.bind_group_label(&id); + } + Self::InvalidPipeline(id) => { + fmt.render_pipeline_label(&id); + } + Self::UsageConflict(UsageConflict::TextureInvalid { id }) => { + fmt.texture_label(&id); + } + Self::UsageConflict(UsageConflict::BufferInvalid { id }) + | Self::DestroyedBuffer(id) => { + fmt.buffer_label(&id); + } + _ => {} + }; + } +} + +#[derive(Clone, Copy, Debug, Default)] +#[cfg_attr( + any(feature = "serial-pass", feature = "trace"), + derive(serde::Serialize) +)] +#[cfg_attr( + any(feature = "serial-pass", feature = "replay"), + derive(serde::Deserialize) +)] +pub struct Rect<T> { + pub x: T, + pub y: T, + pub w: T, + pub h: T, +} + +#[doc(hidden)] +#[derive(Clone, Copy, Debug)] +#[cfg_attr( + any(feature = "serial-pass", feature = "trace"), + derive(serde::Serialize) +)] +#[cfg_attr( + any(feature = "serial-pass", feature = "replay"), + derive(serde::Deserialize) +)] +pub enum RenderCommand { + SetBindGroup { + index: u8, + num_dynamic_offsets: u8, + bind_group_id: id::BindGroupId, + }, + SetPipeline(id::RenderPipelineId), + SetIndexBuffer { + buffer_id: id::BufferId, + index_format: wgt::IndexFormat, + offset: BufferAddress, + size: Option<BufferSize>, + }, + SetVertexBuffer { + slot: u32, + buffer_id: id::BufferId, + offset: BufferAddress, + size: Option<BufferSize>, + }, + SetBlendConstant(Color), + SetStencilReference(u32), + SetViewport { + rect: Rect<f32>, + //TODO: use half-float to reduce the size? + depth_min: f32, + depth_max: f32, + }, + SetScissor(Rect<u32>), + + /// Set a range of push constants to values stored in [`BasePass::push_constant_data`]. + /// + /// See [`wgpu::RenderPass::set_push_constants`] for a detailed explanation + /// of the restrictions these commands must satisfy. + SetPushConstant { + /// Which stages we are setting push constant values for. + stages: wgt::ShaderStages, + + /// The byte offset within the push constant storage to write to. This + /// must be a multiple of four. + offset: u32, + + /// The number of bytes to write. This must be a multiple of four. + size_bytes: u32, + + /// Index in [`BasePass::push_constant_data`] of the start of the data + /// to be written. + /// + /// Note: this is not a byte offset like `offset`. Rather, it is the + /// index of the first `u32` element in `push_constant_data` to read. + /// + /// `None` means zeros should be written to the destination range, and + /// there is no corresponding data in `push_constant_data`. This is used + /// by render bundles, which explicitly clear out any state that + /// post-bundle code might see. + values_offset: Option<u32>, + }, + Draw { + vertex_count: u32, + instance_count: u32, + first_vertex: u32, + first_instance: u32, + }, + DrawIndexed { + index_count: u32, + instance_count: u32, + first_index: u32, + base_vertex: i32, + first_instance: u32, + }, + MultiDrawIndirect { + buffer_id: id::BufferId, + offset: BufferAddress, + /// Count of `None` represents a non-multi call. + count: Option<NonZeroU32>, + indexed: bool, + }, + MultiDrawIndirectCount { + buffer_id: id::BufferId, + offset: BufferAddress, + count_buffer_id: id::BufferId, + count_buffer_offset: BufferAddress, + max_count: u32, + indexed: bool, + }, + PushDebugGroup { + color: u32, + len: usize, + }, + PopDebugGroup, + InsertDebugMarker { + color: u32, + len: usize, + }, + WriteTimestamp { + query_set_id: id::QuerySetId, + query_index: u32, + }, + BeginPipelineStatisticsQuery { + query_set_id: id::QuerySetId, + query_index: u32, + }, + EndPipelineStatisticsQuery, + ExecuteBundle(id::RenderBundleId), +} diff --git a/third_party/rust/wgpu-core/src/command/memory_init.rs b/third_party/rust/wgpu-core/src/command/memory_init.rs new file mode 100644 index 0000000000..52735fec51 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/memory_init.rs @@ -0,0 +1,337 @@ +use std::{collections::hash_map::Entry, ops::Range, vec::Drain}; + +use hal::CommandEncoder; + +use crate::{ + device::Device, + hub::{HalApi, Storage}, + id::{self, TextureId}, + init_tracker::*, + resource::{Buffer, Texture}, + track::{TextureTracker, Tracker}, + FastHashMap, +}; + +use super::{clear::clear_texture, BakedCommands, DestroyedBufferError, DestroyedTextureError}; + +/// Surface that was discarded by `StoreOp::Discard` of a preceding renderpass. +/// Any read access to this surface needs to be preceded by a texture initialization. +#[derive(Clone)] +pub(crate) struct TextureSurfaceDiscard { + pub texture: TextureId, + pub mip_level: u32, + pub layer: u32, +} + +pub(crate) type SurfacesInDiscardState = Vec<TextureSurfaceDiscard>; + +#[derive(Default)] +pub(crate) struct CommandBufferTextureMemoryActions { + /// The tracker actions that we need to be executed before the command + /// buffer is executed. + init_actions: Vec<TextureInitTrackerAction>, + /// All the discards that haven't been followed by init again within the + /// command buffer i.e. everything in this list resets the texture init + /// state *after* the command buffer execution + discards: Vec<TextureSurfaceDiscard>, +} + +impl CommandBufferTextureMemoryActions { + pub(crate) fn drain_init_actions(&mut self) -> Drain<TextureInitTrackerAction> { + self.init_actions.drain(..) + } + + pub(crate) fn discard(&mut self, discard: TextureSurfaceDiscard) { + self.discards.push(discard); + } + + // Registers a TextureInitTrackerAction. + // Returns previously discarded surface that need to be initialized *immediately* now. + // Only returns a non-empty list if action is MemoryInitKind::NeedsInitializedMemory. + #[must_use] + pub(crate) fn register_init_action<A: hal::Api>( + &mut self, + action: &TextureInitTrackerAction, + texture_guard: &Storage<Texture<A>, TextureId>, + ) -> SurfacesInDiscardState { + let mut immediately_necessary_clears = SurfacesInDiscardState::new(); + + // Note that within a command buffer we may stack arbitrary memory init + // actions on the same texture Since we react to them in sequence, they + // are going to be dropped again at queue submit + // + // We don't need to add MemoryInitKind::NeedsInitializedMemory to + // init_actions if a surface is part of the discard list. But that would + // mean splitting up the action which is more than we'd win here. + self.init_actions + .extend(match texture_guard.get(action.id) { + Ok(texture) => texture.initialization_status.check_action(action), + Err(_) => return immediately_necessary_clears, // texture no longer exists + }); + + // We expect very few discarded surfaces at any point in time which is + // why a simple linear search is likely best. (i.e. most of the time + // self.discards is empty!) + let init_actions = &mut self.init_actions; + self.discards.retain(|discarded_surface| { + if discarded_surface.texture == action.id + && action.range.layer_range.contains(&discarded_surface.layer) + && action + .range + .mip_range + .contains(&discarded_surface.mip_level) + { + if let MemoryInitKind::NeedsInitializedMemory = action.kind { + immediately_necessary_clears.push(discarded_surface.clone()); + + // Mark surface as implicitly initialized (this is relevant + // because it might have been uninitialized prior to + // discarding + init_actions.push(TextureInitTrackerAction { + id: discarded_surface.texture, + range: TextureInitRange { + mip_range: discarded_surface.mip_level + ..(discarded_surface.mip_level + 1), + layer_range: discarded_surface.layer..(discarded_surface.layer + 1), + }, + kind: MemoryInitKind::ImplicitlyInitialized, + }); + } + false + } else { + true + } + }); + + immediately_necessary_clears + } + + // Shortcut for register_init_action when it is known that the action is an + // implicit init, not requiring any immediate resource init. + pub(crate) fn register_implicit_init<A: hal::Api>( + &mut self, + id: id::Valid<TextureId>, + range: TextureInitRange, + texture_guard: &Storage<Texture<A>, TextureId>, + ) { + let must_be_empty = self.register_init_action( + &TextureInitTrackerAction { + id: id.0, + range, + kind: MemoryInitKind::ImplicitlyInitialized, + }, + texture_guard, + ); + assert!(must_be_empty.is_empty()); + } +} + +// Utility function that takes discarded surfaces from (several calls to) +// register_init_action and initializes them on the spot. +// +// Takes care of barriers as well! +pub(crate) fn fixup_discarded_surfaces< + A: HalApi, + InitIter: Iterator<Item = TextureSurfaceDiscard>, +>( + inits: InitIter, + encoder: &mut A::CommandEncoder, + texture_guard: &Storage<Texture<A>, TextureId>, + texture_tracker: &mut TextureTracker<A>, + device: &Device<A>, +) { + for init in inits { + clear_texture( + texture_guard, + id::Valid(init.texture), + TextureInitRange { + mip_range: init.mip_level..(init.mip_level + 1), + layer_range: init.layer..(init.layer + 1), + }, + encoder, + texture_tracker, + &device.alignments, + &device.zero_buffer, + ) + .unwrap(); + } +} + +impl<A: HalApi> BakedCommands<A> { + // inserts all buffer initializations that are going to be needed for + // executing the commands and updates resource init states accordingly + pub(crate) fn initialize_buffer_memory( + &mut self, + device_tracker: &mut Tracker<A>, + buffer_guard: &mut Storage<Buffer<A>, id::BufferId>, + ) -> Result<(), DestroyedBufferError> { + // Gather init ranges for each buffer so we can collapse them. + // It is not possible to do this at an earlier point since previously + // executed command buffer change the resource init state. + let mut uninitialized_ranges_per_buffer = FastHashMap::default(); + for buffer_use in self.buffer_memory_init_actions.drain(..) { + let buffer = buffer_guard + .get_mut(buffer_use.id) + .map_err(|_| DestroyedBufferError(buffer_use.id))?; + + // align the end to 4 + let end_remainder = buffer_use.range.end % wgt::COPY_BUFFER_ALIGNMENT; + let end = if end_remainder == 0 { + buffer_use.range.end + } else { + buffer_use.range.end + wgt::COPY_BUFFER_ALIGNMENT - end_remainder + }; + let uninitialized_ranges = buffer + .initialization_status + .drain(buffer_use.range.start..end); + + match buffer_use.kind { + MemoryInitKind::ImplicitlyInitialized => {} + MemoryInitKind::NeedsInitializedMemory => { + match uninitialized_ranges_per_buffer.entry(buffer_use.id) { + Entry::Vacant(e) => { + e.insert( + uninitialized_ranges.collect::<Vec<Range<wgt::BufferAddress>>>(), + ); + } + Entry::Occupied(mut e) => { + e.get_mut().extend(uninitialized_ranges); + } + } + } + } + } + + for (buffer_id, mut ranges) in uninitialized_ranges_per_buffer { + // Collapse touching ranges. + ranges.sort_by_key(|r| r.start); + for i in (1..ranges.len()).rev() { + // The memory init tracker made sure of this! + assert!(ranges[i - 1].end <= ranges[i].start); + if ranges[i].start == ranges[i - 1].end { + ranges[i - 1].end = ranges[i].end; + ranges.swap_remove(i); // Ordering not important at this point + } + } + + // Don't do use_replace since the buffer may already no longer have + // a ref_count. + // + // However, we *know* that it is currently in use, so the tracker + // must already know about it. + let transition = device_tracker + .buffers + .set_single(buffer_guard, buffer_id, hal::BufferUses::COPY_DST) + .unwrap() + .1; + + let buffer = buffer_guard + .get_mut(buffer_id) + .map_err(|_| DestroyedBufferError(buffer_id))?; + let raw_buf = buffer.raw.as_ref().ok_or(DestroyedBufferError(buffer_id))?; + + unsafe { + self.encoder.transition_buffers( + transition + .map(|pending| pending.into_hal(buffer)) + .into_iter(), + ); + } + + for range in ranges.iter() { + assert!( + range.start % wgt::COPY_BUFFER_ALIGNMENT == 0, + "Buffer {:?} has an uninitialized range with a start \ + not aligned to 4 (start was {})", + raw_buf, + range.start + ); + assert!( + range.end % wgt::COPY_BUFFER_ALIGNMENT == 0, + "Buffer {:?} has an uninitialized range with an end \ + not aligned to 4 (end was {})", + raw_buf, + range.end + ); + + unsafe { + self.encoder.clear_buffer(raw_buf, range.clone()); + } + } + } + Ok(()) + } + + // inserts all texture initializations that are going to be needed for + // executing the commands and updates resource init states accordingly any + // textures that are left discarded by this command buffer will be marked as + // uninitialized + pub(crate) fn initialize_texture_memory( + &mut self, + device_tracker: &mut Tracker<A>, + texture_guard: &mut Storage<Texture<A>, TextureId>, + device: &Device<A>, + ) -> Result<(), DestroyedTextureError> { + let mut ranges: Vec<TextureInitRange> = Vec::new(); + for texture_use in self.texture_memory_actions.drain_init_actions() { + let texture = texture_guard + .get_mut(texture_use.id) + .map_err(|_| DestroyedTextureError(texture_use.id))?; + + let use_range = texture_use.range; + let affected_mip_trackers = texture + .initialization_status + .mips + .iter_mut() + .enumerate() + .skip(use_range.mip_range.start as usize) + .take((use_range.mip_range.end - use_range.mip_range.start) as usize); + + match texture_use.kind { + MemoryInitKind::ImplicitlyInitialized => { + for (_, mip_tracker) in affected_mip_trackers { + mip_tracker.drain(use_range.layer_range.clone()); + } + } + MemoryInitKind::NeedsInitializedMemory => { + for (mip_level, mip_tracker) in affected_mip_trackers { + for layer_range in mip_tracker.drain(use_range.layer_range.clone()) { + ranges.push(TextureInitRange { + mip_range: (mip_level as u32)..(mip_level as u32 + 1), + layer_range, + }); + } + } + } + } + + // TODO: Could we attempt some range collapsing here? + for range in ranges.drain(..) { + clear_texture( + texture_guard, + id::Valid(texture_use.id), + range, + &mut self.encoder, + &mut device_tracker.textures, + &device.alignments, + &device.zero_buffer, + ) + .unwrap(); + } + } + + // Now that all buffers/textures have the proper init state for before + // cmdbuf start, we discard init states for textures it left discarded + // after its execution. + for surface_discard in self.texture_memory_actions.discards.iter() { + let texture = texture_guard + .get_mut(surface_discard.texture) + .map_err(|_| DestroyedTextureError(surface_discard.texture))?; + texture + .initialization_status + .discard(surface_discard.mip_level, surface_discard.layer); + } + + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/command/mod.rs b/third_party/rust/wgpu-core/src/command/mod.rs new file mode 100644 index 0000000000..f6dc086350 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/mod.rs @@ -0,0 +1,631 @@ +mod bind; +mod bundle; +mod clear; +mod compute; +mod draw; +mod memory_init; +mod query; +mod render; +mod transfer; + +use std::slice; + +pub(crate) use self::clear::clear_texture; +pub use self::{ + bundle::*, clear::ClearError, compute::*, draw::*, query::*, render::*, transfer::*, +}; + +use self::memory_init::CommandBufferTextureMemoryActions; + +use crate::error::{ErrorFormatter, PrettyError}; +use crate::init_tracker::BufferInitTrackerAction; +use crate::track::{Tracker, UsageScope}; +use crate::{ + hub::{Global, GlobalIdentityHandlerFactory, HalApi, Storage, Token}, + id, + resource::{Buffer, Texture}, + Label, Stored, +}; + +use hal::CommandEncoder as _; +use thiserror::Error; + +#[cfg(feature = "trace")] +use crate::device::trace::Command as TraceCommand; + +const PUSH_CONSTANT_CLEAR_ARRAY: &[u32] = &[0_u32; 64]; + +#[derive(Debug)] +enum CommandEncoderStatus { + Recording, + Finished, + Error, +} + +struct CommandEncoder<A: hal::Api> { + raw: A::CommandEncoder, + list: Vec<A::CommandBuffer>, + is_open: bool, + label: Option<String>, +} + +//TODO: handle errors better +impl<A: hal::Api> CommandEncoder<A> { + fn close(&mut self) { + if self.is_open { + self.is_open = false; + let cmd_buf = unsafe { self.raw.end_encoding().unwrap() }; + self.list.push(cmd_buf); + } + } + + fn discard(&mut self) { + if self.is_open { + self.is_open = false; + unsafe { self.raw.discard_encoding() }; + } + } + + fn open(&mut self) -> &mut A::CommandEncoder { + if !self.is_open { + self.is_open = true; + let label = self.label.as_deref(); + unsafe { self.raw.begin_encoding(label).unwrap() }; + } + &mut self.raw + } + + fn open_pass(&mut self, label: Option<&str>) { + self.is_open = true; + unsafe { self.raw.begin_encoding(label).unwrap() }; + } +} + +pub struct BakedCommands<A: HalApi> { + pub(crate) encoder: A::CommandEncoder, + pub(crate) list: Vec<A::CommandBuffer>, + pub(crate) trackers: Tracker<A>, + buffer_memory_init_actions: Vec<BufferInitTrackerAction>, + texture_memory_actions: CommandBufferTextureMemoryActions, +} + +pub(crate) struct DestroyedBufferError(pub id::BufferId); +pub(crate) struct DestroyedTextureError(pub id::TextureId); + +pub struct CommandBuffer<A: HalApi> { + encoder: CommandEncoder<A>, + status: CommandEncoderStatus, + pub(crate) device_id: Stored<id::DeviceId>, + pub(crate) trackers: Tracker<A>, + buffer_memory_init_actions: Vec<BufferInitTrackerAction>, + texture_memory_actions: CommandBufferTextureMemoryActions, + limits: wgt::Limits, + support_clear_texture: bool, + #[cfg(feature = "trace")] + pub(crate) commands: Option<Vec<TraceCommand>>, +} + +impl<A: HalApi> CommandBuffer<A> { + pub(crate) fn new( + encoder: A::CommandEncoder, + device_id: Stored<id::DeviceId>, + limits: wgt::Limits, + _downlevel: wgt::DownlevelCapabilities, + features: wgt::Features, + #[cfg(feature = "trace")] enable_tracing: bool, + label: &Label, + ) -> Self { + CommandBuffer { + encoder: CommandEncoder { + raw: encoder, + is_open: false, + list: Vec::new(), + label: crate::LabelHelpers::borrow_option(label).map(|s| s.to_string()), + }, + status: CommandEncoderStatus::Recording, + device_id, + trackers: Tracker::new(), + buffer_memory_init_actions: Default::default(), + texture_memory_actions: Default::default(), + limits, + support_clear_texture: features.contains(wgt::Features::CLEAR_TEXTURE), + #[cfg(feature = "trace")] + commands: if enable_tracing { + Some(Vec::new()) + } else { + None + }, + } + } + + pub(crate) fn insert_barriers_from_tracker( + raw: &mut A::CommandEncoder, + base: &mut Tracker<A>, + head: &Tracker<A>, + buffer_guard: &Storage<Buffer<A>, id::BufferId>, + texture_guard: &Storage<Texture<A>, id::TextureId>, + ) { + profiling::scope!("insert_barriers"); + + base.buffers.set_from_tracker(&head.buffers); + base.textures + .set_from_tracker(texture_guard, &head.textures); + + Self::drain_barriers(raw, base, buffer_guard, texture_guard); + } + + pub(crate) fn insert_barriers_from_scope( + raw: &mut A::CommandEncoder, + base: &mut Tracker<A>, + head: &UsageScope<A>, + buffer_guard: &Storage<Buffer<A>, id::BufferId>, + texture_guard: &Storage<Texture<A>, id::TextureId>, + ) { + profiling::scope!("insert_barriers"); + + base.buffers.set_from_usage_scope(&head.buffers); + base.textures + .set_from_usage_scope(texture_guard, &head.textures); + + Self::drain_barriers(raw, base, buffer_guard, texture_guard); + } + + pub(crate) fn drain_barriers( + raw: &mut A::CommandEncoder, + base: &mut Tracker<A>, + buffer_guard: &Storage<Buffer<A>, id::BufferId>, + texture_guard: &Storage<Texture<A>, id::TextureId>, + ) { + profiling::scope!("drain_barriers"); + + let buffer_barriers = base.buffers.drain().map(|pending| { + let buf = unsafe { &buffer_guard.get_unchecked(pending.id) }; + pending.into_hal(buf) + }); + let texture_barriers = base.textures.drain().map(|pending| { + let tex = unsafe { texture_guard.get_unchecked(pending.id) }; + pending.into_hal(tex) + }); + + unsafe { + raw.transition_buffers(buffer_barriers); + raw.transition_textures(texture_barriers); + } + } +} + +impl<A: HalApi> CommandBuffer<A> { + fn get_encoder_mut( + storage: &mut Storage<Self, id::CommandEncoderId>, + id: id::CommandEncoderId, + ) -> Result<&mut Self, CommandEncoderError> { + match storage.get_mut(id) { + Ok(cmd_buf) => match cmd_buf.status { + CommandEncoderStatus::Recording => Ok(cmd_buf), + CommandEncoderStatus::Finished => Err(CommandEncoderError::NotRecording), + CommandEncoderStatus::Error => Err(CommandEncoderError::Invalid), + }, + Err(_) => Err(CommandEncoderError::Invalid), + } + } + + pub fn is_finished(&self) -> bool { + match self.status { + CommandEncoderStatus::Finished => true, + _ => false, + } + } + + pub(crate) fn into_baked(self) -> BakedCommands<A> { + BakedCommands { + encoder: self.encoder.raw, + list: self.encoder.list, + trackers: self.trackers, + buffer_memory_init_actions: self.buffer_memory_init_actions, + texture_memory_actions: self.texture_memory_actions, + } + } +} + +impl<A: HalApi> crate::hub::Resource for CommandBuffer<A> { + const TYPE: &'static str = "CommandBuffer"; + + fn life_guard(&self) -> &crate::LifeGuard { + unreachable!() + } + + fn label(&self) -> &str { + self.encoder.label.as_ref().map_or("", |s| s.as_str()) + } +} + +#[derive(Copy, Clone, Debug)] +pub struct BasePassRef<'a, C> { + pub label: Option<&'a str>, + pub commands: &'a [C], + pub dynamic_offsets: &'a [wgt::DynamicOffset], + pub string_data: &'a [u8], + pub push_constant_data: &'a [u32], +} + +/// A stream of commands for a render pass or compute pass. +/// +/// This also contains side tables referred to by certain commands, +/// like dynamic offsets for [`SetBindGroup`] or string data for +/// [`InsertDebugMarker`]. +/// +/// Render passes use `BasePass<RenderCommand>`, whereas compute +/// passes use `BasePass<ComputeCommand>`. +/// +/// [`SetBindGroup`]: RenderCommand::SetBindGroup +/// [`InsertDebugMarker`]: RenderCommand::InsertDebugMarker +#[doc(hidden)] +#[derive(Debug)] +#[cfg_attr( + any(feature = "serial-pass", feature = "trace"), + derive(serde::Serialize) +)] +#[cfg_attr( + any(feature = "serial-pass", feature = "replay"), + derive(serde::Deserialize) +)] +pub struct BasePass<C> { + pub label: Option<String>, + + /// The stream of commands. + pub commands: Vec<C>, + + /// Dynamic offsets consumed by [`SetBindGroup`] commands in `commands`. + /// + /// Each successive `SetBindGroup` consumes the next + /// [`num_dynamic_offsets`] values from this list. + pub dynamic_offsets: Vec<wgt::DynamicOffset>, + + /// Strings used by debug instructions. + /// + /// Each successive [`PushDebugGroup`] or [`InsertDebugMarker`] + /// instruction consumes the next `len` bytes from this vector. + pub string_data: Vec<u8>, + + /// Data used by `SetPushConstant` instructions. + /// + /// See the documentation for [`RenderCommand::SetPushConstant`] + /// and [`ComputeCommand::SetPushConstant`] for details. + pub push_constant_data: Vec<u32>, +} + +impl<C: Clone> BasePass<C> { + fn new(label: &Label) -> Self { + Self { + label: label.as_ref().map(|cow| cow.to_string()), + commands: Vec::new(), + dynamic_offsets: Vec::new(), + string_data: Vec::new(), + push_constant_data: Vec::new(), + } + } + + #[cfg(feature = "trace")] + fn from_ref(base: BasePassRef<C>) -> Self { + Self { + label: base.label.map(str::to_string), + commands: base.commands.to_vec(), + dynamic_offsets: base.dynamic_offsets.to_vec(), + string_data: base.string_data.to_vec(), + push_constant_data: base.push_constant_data.to_vec(), + } + } + + pub fn as_ref(&self) -> BasePassRef<C> { + BasePassRef { + label: self.label.as_deref(), + commands: &self.commands, + dynamic_offsets: &self.dynamic_offsets, + string_data: &self.string_data, + push_constant_data: &self.push_constant_data, + } + } +} + +#[derive(Clone, Debug, Error)] +pub enum CommandEncoderError { + #[error("command encoder is invalid")] + Invalid, + #[error("command encoder must be active")] + NotRecording, +} + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + pub fn command_encoder_finish<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + _desc: &wgt::CommandBufferDescriptor<Label>, + ) -> (id::CommandBufferId, Option<CommandEncoderError>) { + profiling::scope!("CommandEncoder::finish"); + + let hub = A::hub(self); + let mut token = Token::root(); + let (mut cmd_buf_guard, _) = hub.command_buffers.write(&mut token); + + let error = match cmd_buf_guard.get_mut(encoder_id) { + Ok(cmd_buf) => match cmd_buf.status { + CommandEncoderStatus::Recording => { + cmd_buf.encoder.close(); + cmd_buf.status = CommandEncoderStatus::Finished; + //Note: if we want to stop tracking the swapchain texture view, + // this is the place to do it. + log::trace!("Command buffer {:?}", encoder_id); + None + } + CommandEncoderStatus::Finished => Some(CommandEncoderError::NotRecording), + CommandEncoderStatus::Error => { + cmd_buf.encoder.discard(); + Some(CommandEncoderError::Invalid) + } + }, + Err(_) => Some(CommandEncoderError::Invalid), + }; + + (encoder_id, error) + } + + pub fn command_encoder_push_debug_group<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + label: &str, + ) -> Result<(), CommandEncoderError> { + profiling::scope!("CommandEncoder::push_debug_group"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (mut cmd_buf_guard, _) = hub.command_buffers.write(&mut token); + let cmd_buf = CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, encoder_id)?; + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::PushDebugGroup(label.to_string())); + } + + let cmd_buf_raw = cmd_buf.encoder.open(); + unsafe { + cmd_buf_raw.begin_debug_marker(label); + } + Ok(()) + } + + pub fn command_encoder_insert_debug_marker<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + label: &str, + ) -> Result<(), CommandEncoderError> { + profiling::scope!("CommandEncoder::insert_debug_marker"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (mut cmd_buf_guard, _) = hub.command_buffers.write(&mut token); + let cmd_buf = CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, encoder_id)?; + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::InsertDebugMarker(label.to_string())); + } + + let cmd_buf_raw = cmd_buf.encoder.open(); + unsafe { + cmd_buf_raw.insert_debug_marker(label); + } + Ok(()) + } + + pub fn command_encoder_pop_debug_group<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + ) -> Result<(), CommandEncoderError> { + profiling::scope!("CommandEncoder::pop_debug_marker"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (mut cmd_buf_guard, _) = hub.command_buffers.write(&mut token); + let cmd_buf = CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, encoder_id)?; + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::PopDebugGroup); + } + + let cmd_buf_raw = cmd_buf.encoder.open(); + unsafe { + cmd_buf_raw.end_debug_marker(); + } + Ok(()) + } +} + +fn push_constant_clear<PushFn>(offset: u32, size_bytes: u32, mut push_fn: PushFn) +where + PushFn: FnMut(u32, &[u32]), +{ + let mut count_words = 0_u32; + let size_words = size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT; + while count_words < size_words { + let count_bytes = count_words * wgt::PUSH_CONSTANT_ALIGNMENT; + let size_to_write_words = + (size_words - count_words).min(PUSH_CONSTANT_CLEAR_ARRAY.len() as u32); + + push_fn( + offset + count_bytes, + &PUSH_CONSTANT_CLEAR_ARRAY[0..size_to_write_words as usize], + ); + + count_words += size_to_write_words; + } +} + +#[derive(Debug, Copy, Clone)] +struct StateChange<T> { + last_state: Option<T>, +} + +impl<T: Copy + PartialEq> StateChange<T> { + fn new() -> Self { + Self { last_state: None } + } + fn set_and_check_redundant(&mut self, new_state: T) -> bool { + let already_set = self.last_state == Some(new_state); + self.last_state = Some(new_state); + already_set + } + fn reset(&mut self) { + self.last_state = None; + } +} + +impl<T: Copy + PartialEq> Default for StateChange<T> { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug)] +struct BindGroupStateChange { + last_states: [StateChange<id::BindGroupId>; hal::MAX_BIND_GROUPS], +} + +impl BindGroupStateChange { + fn new() -> Self { + Self { + last_states: [StateChange::new(); hal::MAX_BIND_GROUPS], + } + } + + unsafe fn set_and_check_redundant( + &mut self, + bind_group_id: id::BindGroupId, + index: u32, + dynamic_offsets: &mut Vec<u32>, + offsets: *const wgt::DynamicOffset, + offset_length: usize, + ) -> bool { + // For now never deduplicate bind groups with dynamic offsets. + if offset_length == 0 { + // If this get returns None, that means we're well over the limit, + // so let the call through to get a proper error + if let Some(current_bind_group) = self.last_states.get_mut(index as usize) { + // Bail out if we're binding the same bind group. + if current_bind_group.set_and_check_redundant(bind_group_id) { + return true; + } + } + } else { + // We intentionally remove the memory of this bind group if we have dynamic offsets, + // such that if you try to bind this bind group later with _no_ dynamic offsets it + // tries to bind it again and gives a proper validation error. + if let Some(current_bind_group) = self.last_states.get_mut(index as usize) { + current_bind_group.reset(); + } + dynamic_offsets + .extend_from_slice(unsafe { slice::from_raw_parts(offsets, offset_length) }); + } + false + } + fn reset(&mut self) { + self.last_states = [StateChange::new(); hal::MAX_BIND_GROUPS]; + } +} + +impl Default for BindGroupStateChange { + fn default() -> Self { + Self::new() + } +} + +trait MapPassErr<T, O> { + fn map_pass_err(self, scope: PassErrorScope) -> Result<T, O>; +} + +#[derive(Clone, Copy, Debug, Error)] +pub enum PassErrorScope { + #[error("In a bundle parameter")] + Bundle, + #[error("In a pass parameter")] + Pass(id::CommandEncoderId), + #[error("In a set_bind_group command")] + SetBindGroup(id::BindGroupId), + #[error("In a set_pipeline command")] + SetPipelineRender(id::RenderPipelineId), + #[error("In a set_pipeline command")] + SetPipelineCompute(id::ComputePipelineId), + #[error("In a set_push_constant command")] + SetPushConstant, + #[error("In a set_vertex_buffer command")] + SetVertexBuffer(id::BufferId), + #[error("In a set_index_buffer command")] + SetIndexBuffer(id::BufferId), + #[error("In a set_viewport command")] + SetViewport, + #[error("In a set_scissor_rect command")] + SetScissorRect, + #[error("In a draw command, indexed:{indexed} indirect:{indirect}")] + Draw { + indexed: bool, + indirect: bool, + pipeline: Option<id::RenderPipelineId>, + }, + #[error("While resetting queries after the renderpass was ran")] + QueryReset, + #[error("In a write_timestamp command")] + WriteTimestamp, + #[error("In a begin_pipeline_statistics_query command")] + BeginPipelineStatisticsQuery, + #[error("In a end_pipeline_statistics_query command")] + EndPipelineStatisticsQuery, + #[error("In a execute_bundle command")] + ExecuteBundle, + #[error("In a dispatch command, indirect:{indirect}")] + Dispatch { + indirect: bool, + pipeline: Option<id::ComputePipelineId>, + }, + #[error("In a pop_debug_group command")] + PopDebugGroup, +} + +impl PrettyError for PassErrorScope { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + // This error is not in the error chain, only notes are needed + match *self { + Self::Pass(id) => { + fmt.command_buffer_label(&id); + } + Self::SetBindGroup(id) => { + fmt.bind_group_label(&id); + } + Self::SetPipelineRender(id) => { + fmt.render_pipeline_label(&id); + } + Self::SetPipelineCompute(id) => { + fmt.compute_pipeline_label(&id); + } + Self::SetVertexBuffer(id) => { + fmt.buffer_label(&id); + } + Self::SetIndexBuffer(id) => { + fmt.buffer_label(&id); + } + Self::Draw { + pipeline: Some(id), .. + } => { + fmt.render_pipeline_label(&id); + } + Self::Dispatch { + pipeline: Some(id), .. + } => { + fmt.compute_pipeline_label(&id); + } + _ => {} + } + } +} diff --git a/third_party/rust/wgpu-core/src/command/query.rs b/third_party/rust/wgpu-core/src/command/query.rs new file mode 100644 index 0000000000..d4176195a8 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/query.rs @@ -0,0 +1,428 @@ +use hal::CommandEncoder as _; + +#[cfg(feature = "trace")] +use crate::device::trace::Command as TraceCommand; +use crate::{ + command::{CommandBuffer, CommandEncoderError}, + hub::{Global, GlobalIdentityHandlerFactory, HalApi, Storage, Token}, + id::{self, Id, TypedId}, + init_tracker::MemoryInitKind, + resource::QuerySet, + Epoch, FastHashMap, Index, +}; +use std::{iter, marker::PhantomData}; +use thiserror::Error; +use wgt::BufferAddress; + +#[derive(Debug)] +pub(super) struct QueryResetMap<A: hal::Api> { + map: FastHashMap<Index, (Vec<bool>, Epoch)>, + _phantom: PhantomData<A>, +} +impl<A: hal::Api> QueryResetMap<A> { + pub fn new() -> Self { + Self { + map: FastHashMap::default(), + _phantom: PhantomData, + } + } + + pub fn use_query_set( + &mut self, + id: id::QuerySetId, + query_set: &QuerySet<A>, + query: u32, + ) -> bool { + let (index, epoch, _) = id.unzip(); + let vec_pair = self + .map + .entry(index) + .or_insert_with(|| (vec![false; query_set.desc.count as usize], epoch)); + + std::mem::replace(&mut vec_pair.0[query as usize], true) + } + + pub fn reset_queries( + self, + raw_encoder: &mut A::CommandEncoder, + query_set_storage: &Storage<QuerySet<A>, id::QuerySetId>, + backend: wgt::Backend, + ) -> Result<(), id::QuerySetId> { + for (query_set_id, (state, epoch)) in self.map.into_iter() { + let id = Id::zip(query_set_id, epoch, backend); + let query_set = query_set_storage.get(id).map_err(|_| id)?; + + debug_assert_eq!(state.len(), query_set.desc.count as usize); + + // Need to find all "runs" of values which need resets. If the state vector is: + // [false, true, true, false, true], we want to reset [1..3, 4..5]. This minimizes + // the amount of resets needed. + let mut run_start: Option<u32> = None; + for (idx, value) in state.into_iter().chain(iter::once(false)).enumerate() { + match (run_start, value) { + // We're inside of a run, do nothing + (Some(..), true) => {} + // We've hit the end of a run, dispatch a reset + (Some(start), false) => { + run_start = None; + unsafe { raw_encoder.reset_queries(&query_set.raw, start..idx as u32) }; + } + // We're starting a run + (None, true) => { + run_start = Some(idx as u32); + } + // We're in a run of falses, do nothing. + (None, false) => {} + } + } + } + + Ok(()) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum SimplifiedQueryType { + Occlusion, + Timestamp, + PipelineStatistics, +} +impl From<wgt::QueryType> for SimplifiedQueryType { + fn from(q: wgt::QueryType) -> Self { + match q { + wgt::QueryType::Occlusion => SimplifiedQueryType::Occlusion, + wgt::QueryType::Timestamp => SimplifiedQueryType::Timestamp, + wgt::QueryType::PipelineStatistics(..) => SimplifiedQueryType::PipelineStatistics, + } + } +} + +/// Error encountered when dealing with queries +#[derive(Clone, Debug, Error)] +pub enum QueryError { + #[error(transparent)] + Encoder(#[from] CommandEncoderError), + #[error("Error encountered while trying to use queries")] + Use(#[from] QueryUseError), + #[error("Error encountered while trying to resolve a query")] + Resolve(#[from] ResolveError), + #[error("Buffer {0:?} is invalid or destroyed")] + InvalidBuffer(id::BufferId), + #[error("QuerySet {0:?} is invalid or destroyed")] + InvalidQuerySet(id::QuerySetId), +} + +impl crate::error::PrettyError for QueryError { + fn fmt_pretty(&self, fmt: &mut crate::error::ErrorFormatter) { + fmt.error(self); + match *self { + Self::InvalidBuffer(id) => fmt.buffer_label(&id), + Self::InvalidQuerySet(id) => fmt.query_set_label(&id), + + _ => {} + } + } +} + +/// Error encountered while trying to use queries +#[derive(Clone, Debug, Error)] +pub enum QueryUseError { + #[error("Query {query_index} is out of bounds for a query set of size {query_set_size}")] + OutOfBounds { + query_index: u32, + query_set_size: u32, + }, + #[error("Query {query_index} has already been used within the same renderpass. Queries must only be used once per renderpass")] + UsedTwiceInsideRenderpass { query_index: u32 }, + #[error("Query {new_query_index} was started while query {active_query_index} was already active. No more than one statistic or occlusion query may be active at once")] + AlreadyStarted { + active_query_index: u32, + new_query_index: u32, + }, + #[error("Query was stopped while there was no active query")] + AlreadyStopped, + #[error("A query of type {query_type:?} was started using a query set of type {set_type:?}")] + IncompatibleType { + set_type: SimplifiedQueryType, + query_type: SimplifiedQueryType, + }, +} + +/// Error encountered while trying to resolve a query. +#[derive(Clone, Debug, Error)] +pub enum ResolveError { + #[error("Queries can only be resolved to buffers that contain the COPY_DST usage")] + MissingBufferUsage, + #[error("Resolve buffer offset has to be aligned to `QUERY_RESOLVE_BUFFER_ALIGNMENT")] + BufferOffsetAlignment, + #[error("Resolving queries {start_query}..{end_query} would overrun the query set of size {query_set_size}")] + QueryOverrun { + start_query: u32, + end_query: u32, + query_set_size: u32, + }, + #[error("Resolving queries {start_query}..{end_query} ({stride} byte queries) will end up overrunning the bounds of the destination buffer of size {buffer_size} using offsets {buffer_start_offset}..{buffer_end_offset}")] + BufferOverrun { + start_query: u32, + end_query: u32, + stride: u32, + buffer_size: BufferAddress, + buffer_start_offset: BufferAddress, + buffer_end_offset: BufferAddress, + }, +} + +impl<A: HalApi> QuerySet<A> { + fn validate_query( + &self, + query_set_id: id::QuerySetId, + query_type: SimplifiedQueryType, + query_index: u32, + reset_state: Option<&mut QueryResetMap<A>>, + ) -> Result<&A::QuerySet, QueryUseError> { + // We need to defer our resets because we are in a renderpass, + // add the usage to the reset map. + if let Some(reset) = reset_state { + let used = reset.use_query_set(query_set_id, self, query_index); + if used { + return Err(QueryUseError::UsedTwiceInsideRenderpass { query_index }); + } + } + + let simple_set_type = SimplifiedQueryType::from(self.desc.ty); + if simple_set_type != query_type { + return Err(QueryUseError::IncompatibleType { + query_type, + set_type: simple_set_type, + }); + } + + if query_index >= self.desc.count { + return Err(QueryUseError::OutOfBounds { + query_index, + query_set_size: self.desc.count, + }); + } + + Ok(&self.raw) + } + + pub(super) fn validate_and_write_timestamp( + &self, + raw_encoder: &mut A::CommandEncoder, + query_set_id: id::QuerySetId, + query_index: u32, + reset_state: Option<&mut QueryResetMap<A>>, + ) -> Result<(), QueryUseError> { + let needs_reset = reset_state.is_none(); + let query_set = self.validate_query( + query_set_id, + SimplifiedQueryType::Timestamp, + query_index, + reset_state, + )?; + + unsafe { + // If we don't have a reset state tracker which can defer resets, we must reset now. + if needs_reset { + raw_encoder.reset_queries(&self.raw, query_index..(query_index + 1)); + } + raw_encoder.write_timestamp(query_set, query_index); + } + + Ok(()) + } + + pub(super) fn validate_and_begin_pipeline_statistics_query( + &self, + raw_encoder: &mut A::CommandEncoder, + query_set_id: id::QuerySetId, + query_index: u32, + reset_state: Option<&mut QueryResetMap<A>>, + active_query: &mut Option<(id::QuerySetId, u32)>, + ) -> Result<(), QueryUseError> { + let needs_reset = reset_state.is_none(); + let query_set = self.validate_query( + query_set_id, + SimplifiedQueryType::PipelineStatistics, + query_index, + reset_state, + )?; + + if let Some((_old_id, old_idx)) = active_query.replace((query_set_id, query_index)) { + return Err(QueryUseError::AlreadyStarted { + active_query_index: old_idx, + new_query_index: query_index, + }); + } + + unsafe { + // If we don't have a reset state tracker which can defer resets, we must reset now. + if needs_reset { + raw_encoder.reset_queries(&self.raw, query_index..(query_index + 1)); + } + raw_encoder.begin_query(query_set, query_index); + } + + Ok(()) + } +} + +pub(super) fn end_pipeline_statistics_query<A: HalApi>( + raw_encoder: &mut A::CommandEncoder, + storage: &Storage<QuerySet<A>, id::QuerySetId>, + active_query: &mut Option<(id::QuerySetId, u32)>, +) -> Result<(), QueryUseError> { + if let Some((query_set_id, query_index)) = active_query.take() { + // We can unwrap here as the validity was validated when the active query was set + let query_set = storage.get(query_set_id).unwrap(); + + unsafe { raw_encoder.end_query(&query_set.raw, query_index) }; + + Ok(()) + } else { + Err(QueryUseError::AlreadyStopped) + } +} + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + pub fn command_encoder_write_timestamp<A: HalApi>( + &self, + command_encoder_id: id::CommandEncoderId, + query_set_id: id::QuerySetId, + query_index: u32, + ) -> Result<(), QueryError> { + let hub = A::hub(self); + let mut token = Token::root(); + + let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); + let (query_set_guard, _) = hub.query_sets.read(&mut token); + + let cmd_buf = CommandBuffer::get_encoder_mut(&mut cmd_buf_guard, command_encoder_id)?; + let raw_encoder = cmd_buf.encoder.open(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::WriteTimestamp { + query_set_id, + query_index, + }); + } + + let query_set = cmd_buf + .trackers + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(QueryError::InvalidQuerySet(query_set_id))?; + + query_set.validate_and_write_timestamp(raw_encoder, query_set_id, query_index, None)?; + + Ok(()) + } + + pub fn command_encoder_resolve_query_set<A: HalApi>( + &self, + command_encoder_id: id::CommandEncoderId, + query_set_id: id::QuerySetId, + start_query: u32, + query_count: u32, + destination: id::BufferId, + destination_offset: BufferAddress, + ) -> Result<(), QueryError> { + let hub = A::hub(self); + let mut token = Token::root(); + + let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); + let (query_set_guard, mut token) = hub.query_sets.read(&mut token); + let (buffer_guard, _) = hub.buffers.read(&mut token); + + let cmd_buf = CommandBuffer::get_encoder_mut(&mut cmd_buf_guard, command_encoder_id)?; + let raw_encoder = cmd_buf.encoder.open(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::ResolveQuerySet { + query_set_id, + start_query, + query_count, + destination, + destination_offset, + }); + } + + if destination_offset % wgt::QUERY_RESOLVE_BUFFER_ALIGNMENT != 0 { + return Err(QueryError::Resolve(ResolveError::BufferOffsetAlignment)); + } + + let query_set = cmd_buf + .trackers + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(QueryError::InvalidQuerySet(query_set_id))?; + + let (dst_buffer, dst_pending) = cmd_buf + .trackers + .buffers + .set_single(&*buffer_guard, destination, hal::BufferUses::COPY_DST) + .ok_or(QueryError::InvalidBuffer(destination))?; + let dst_barrier = dst_pending.map(|pending| pending.into_hal(dst_buffer)); + + if !dst_buffer.usage.contains(wgt::BufferUsages::COPY_DST) { + return Err(ResolveError::MissingBufferUsage.into()); + } + + let end_query = start_query + query_count; + if end_query > query_set.desc.count { + return Err(ResolveError::QueryOverrun { + start_query, + end_query, + query_set_size: query_set.desc.count, + } + .into()); + } + + let elements_per_query = match query_set.desc.ty { + wgt::QueryType::Occlusion => 1, + wgt::QueryType::PipelineStatistics(ps) => ps.bits().count_ones(), + wgt::QueryType::Timestamp => 1, + }; + let stride = elements_per_query * wgt::QUERY_SIZE; + let bytes_used = (stride * query_count) as BufferAddress; + + let buffer_start_offset = destination_offset; + let buffer_end_offset = buffer_start_offset + bytes_used; + + if buffer_end_offset > dst_buffer.size { + return Err(ResolveError::BufferOverrun { + start_query, + end_query, + stride, + buffer_size: dst_buffer.size, + buffer_start_offset, + buffer_end_offset, + } + .into()); + } + + cmd_buf + .buffer_memory_init_actions + .extend(dst_buffer.initialization_status.create_action( + destination, + buffer_start_offset..buffer_end_offset, + MemoryInitKind::ImplicitlyInitialized, + )); + + unsafe { + raw_encoder.transition_buffers(dst_barrier.into_iter()); + raw_encoder.copy_query_results( + &query_set.raw, + start_query..end_query, + dst_buffer.raw.as_ref().unwrap(), + destination_offset, + wgt::BufferSize::new_unchecked(stride as u64), + ); + } + + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/command/render.rs b/third_party/rust/wgpu-core/src/command/render.rs new file mode 100644 index 0000000000..04e27ec062 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/render.rs @@ -0,0 +1,2493 @@ +use crate::{ + binding_model::BindError, + command::{ + self, + bind::Binder, + end_pipeline_statistics_query, + memory_init::{fixup_discarded_surfaces, SurfacesInDiscardState}, + BasePass, BasePassRef, BindGroupStateChange, CommandBuffer, CommandEncoderError, + CommandEncoderStatus, DrawError, ExecutionError, MapPassErr, PassErrorScope, QueryResetMap, + QueryUseError, RenderCommand, RenderCommandError, StateChange, + }, + device::{ + AttachmentData, Device, MissingDownlevelFlags, MissingFeatures, + RenderPassCompatibilityError, RenderPassContext, + }, + error::{ErrorFormatter, PrettyError}, + hub::{Global, GlobalIdentityHandlerFactory, HalApi, Storage, Token}, + id, + init_tracker::{MemoryInitKind, TextureInitRange, TextureInitTrackerAction}, + pipeline::{self, PipelineFlags}, + resource::{self, Buffer, Texture, TextureView}, + track::{TextureSelector, UsageConflict, UsageScope}, + validation::{ + check_buffer_usage, check_texture_usage, MissingBufferUsageError, MissingTextureUsageError, + }, + Label, Stored, +}; + +use arrayvec::ArrayVec; +use hal::CommandEncoder as _; +use thiserror::Error; +use wgt::{ + BufferAddress, BufferSize, BufferUsages, Color, IndexFormat, TextureUsages, + TextureViewDimension, VertexStepMode, +}; + +#[cfg(any(feature = "serial-pass", feature = "replay"))] +use serde::Deserialize; +#[cfg(any(feature = "serial-pass", feature = "trace"))] +use serde::Serialize; + +use std::{borrow::Cow, fmt, iter, marker::PhantomData, mem, num::NonZeroU32, ops::Range, str}; + +use super::{memory_init::TextureSurfaceDiscard, CommandBufferTextureMemoryActions}; + +/// Operation to perform to the output attachment at the start of a renderpass. +#[repr(C)] +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] +#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] +#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] +#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))] +pub enum LoadOp { + /// Clear the output attachment with the clear color. Clearing is faster than loading. + Clear = 0, + /// Do not clear output attachment. + Load = 1, +} + +/// Operation to perform to the output attachment at the end of a renderpass. +#[repr(C)] +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] +#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] +#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] +#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))] +pub enum StoreOp { + /// Discards the content of the render target. + /// + /// If you don't care about the contents of the target, this can be faster. + Discard = 0, + /// Store the result of the renderpass. + Store = 1, +} + +/// Describes an individual channel within a render pass, such as color, depth, or stencil. +#[repr(C)] +#[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] +#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] +pub struct PassChannel<V> { + /// Operation to perform to the output attachment at the start of a + /// renderpass. + /// + /// This must be clear if it is the first renderpass rendering to a swap + /// chain image. + pub load_op: LoadOp, + /// Operation to perform to the output attachment at the end of a renderpass. + pub store_op: StoreOp, + /// If load_op is [`LoadOp::Clear`], the attachment will be cleared to this + /// color. + pub clear_value: V, + /// If true, the relevant channel is not changed by a renderpass, and the + /// corresponding attachment can be used inside the pass by other read-only + /// usages. + pub read_only: bool, +} + +impl<V> PassChannel<V> { + fn hal_ops(&self) -> hal::AttachmentOps { + let mut ops = hal::AttachmentOps::empty(); + match self.load_op { + LoadOp::Load => ops |= hal::AttachmentOps::LOAD, + LoadOp::Clear => (), + }; + match self.store_op { + StoreOp::Store => ops |= hal::AttachmentOps::STORE, + StoreOp::Discard => (), + }; + ops + } +} + +/// Describes a color attachment to a render pass. +#[repr(C)] +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] +#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] +pub struct RenderPassColorAttachment { + /// The view to use as an attachment. + pub view: id::TextureViewId, + /// The view that will receive the resolved output if multisampling is used. + pub resolve_target: Option<id::TextureViewId>, + /// What operations will be performed on this color attachment. + pub channel: PassChannel<Color>, +} + +/// Describes a depth/stencil attachment to a render pass. +#[repr(C)] +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] +#[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] +pub struct RenderPassDepthStencilAttachment { + /// The view to use as an attachment. + pub view: id::TextureViewId, + /// What operations will be performed on the depth part of the attachment. + pub depth: PassChannel<f32>, + /// What operations will be performed on the stencil part of the attachment. + pub stencil: PassChannel<u32>, +} + +impl RenderPassDepthStencilAttachment { + /// Validate the given aspects' read-only flags against their load + /// and store ops. + /// + /// When an aspect is read-only, its load and store ops must be + /// `LoadOp::Load` and `StoreOp::Store`. + /// + /// On success, return a pair `(depth, stencil)` indicating + /// whether the depth and stencil passes are read-only. + fn depth_stencil_read_only( + &self, + aspects: hal::FormatAspects, + ) -> Result<(bool, bool), RenderPassErrorInner> { + let mut depth_read_only = true; + let mut stencil_read_only = true; + + if aspects.contains(hal::FormatAspects::DEPTH) { + if self.depth.read_only + && (self.depth.load_op, self.depth.store_op) != (LoadOp::Load, StoreOp::Store) + { + return Err(RenderPassErrorInner::InvalidDepthOps); + } + depth_read_only = self.depth.read_only; + } + + if aspects.contains(hal::FormatAspects::STENCIL) { + if self.stencil.read_only + && (self.stencil.load_op, self.stencil.store_op) != (LoadOp::Load, StoreOp::Store) + { + return Err(RenderPassErrorInner::InvalidStencilOps); + } + stencil_read_only = self.stencil.read_only; + } + + Ok((depth_read_only, stencil_read_only)) + } +} + +/// Describes the attachments of a render pass. +#[derive(Clone, Debug, Default, PartialEq)] +pub struct RenderPassDescriptor<'a> { + pub label: Label<'a>, + /// The color attachments of the render pass. + pub color_attachments: Cow<'a, [Option<RenderPassColorAttachment>]>, + /// The depth and stencil attachment of the render pass, if any. + pub depth_stencil_attachment: Option<&'a RenderPassDepthStencilAttachment>, +} + +#[cfg_attr(feature = "serial-pass", derive(Deserialize, Serialize))] +pub struct RenderPass { + base: BasePass<RenderCommand>, + parent_id: id::CommandEncoderId, + color_targets: ArrayVec<Option<RenderPassColorAttachment>, { hal::MAX_COLOR_ATTACHMENTS }>, + depth_stencil_target: Option<RenderPassDepthStencilAttachment>, + + // Resource binding dedupe state. + #[cfg_attr(feature = "serial-pass", serde(skip))] + current_bind_groups: BindGroupStateChange, + #[cfg_attr(feature = "serial-pass", serde(skip))] + current_pipeline: StateChange<id::RenderPipelineId>, +} + +impl RenderPass { + pub fn new(parent_id: id::CommandEncoderId, desc: &RenderPassDescriptor) -> Self { + Self { + base: BasePass::new(&desc.label), + parent_id, + color_targets: desc.color_attachments.iter().cloned().collect(), + depth_stencil_target: desc.depth_stencil_attachment.cloned(), + + current_bind_groups: BindGroupStateChange::new(), + current_pipeline: StateChange::new(), + } + } + + pub fn parent_id(&self) -> id::CommandEncoderId { + self.parent_id + } + + #[cfg(feature = "trace")] + pub fn into_command(self) -> crate::device::trace::Command { + crate::device::trace::Command::RunRenderPass { + base: self.base, + target_colors: self.color_targets.into_iter().collect(), + target_depth_stencil: self.depth_stencil_target, + } + } + + pub fn set_index_buffer( + &mut self, + buffer_id: id::BufferId, + index_format: IndexFormat, + offset: BufferAddress, + size: Option<BufferSize>, + ) { + self.base.commands.push(RenderCommand::SetIndexBuffer { + buffer_id, + index_format, + offset, + size, + }); + } +} + +impl fmt::Debug for RenderPass { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RenderPass") + .field("encoder_id", &self.parent_id) + .field("color_targets", &self.color_targets) + .field("depth_stencil_target", &self.depth_stencil_target) + .field("command count", &self.base.commands.len()) + .field("dynamic offset count", &self.base.dynamic_offsets.len()) + .field( + "push constant u32 count", + &self.base.push_constant_data.len(), + ) + .finish() + } +} + +#[derive(Debug, PartialEq)] +enum OptionalState { + Unused, + Required, + Set, +} + +impl OptionalState { + fn require(&mut self, require: bool) { + if require && *self == Self::Unused { + *self = Self::Required; + } + } +} + +#[derive(Debug, Default)] +struct IndexState { + bound_buffer_view: Option<(id::Valid<id::BufferId>, Range<BufferAddress>)>, + format: Option<IndexFormat>, + pipeline_format: Option<IndexFormat>, + limit: u32, +} + +impl IndexState { + fn update_limit(&mut self) { + self.limit = match self.bound_buffer_view { + Some((_, ref range)) => { + let format = self + .format + .expect("IndexState::update_limit must be called after a index buffer is set"); + let shift = match format { + IndexFormat::Uint16 => 1, + IndexFormat::Uint32 => 2, + }; + ((range.end - range.start) >> shift) as u32 + } + None => 0, + } + } + + fn reset(&mut self) { + self.bound_buffer_view = None; + self.limit = 0; + } +} + +#[derive(Clone, Copy, Debug)] +struct VertexBufferState { + total_size: BufferAddress, + step: pipeline::VertexStep, + bound: bool, +} + +impl VertexBufferState { + const EMPTY: Self = Self { + total_size: 0, + step: pipeline::VertexStep { + stride: 0, + mode: VertexStepMode::Vertex, + }, + bound: false, + }; +} + +#[derive(Debug, Default)] +struct VertexState { + inputs: ArrayVec<VertexBufferState, { hal::MAX_VERTEX_BUFFERS }>, + /// Length of the shortest vertex rate vertex buffer + vertex_limit: u32, + /// Buffer slot which the shortest vertex rate vertex buffer is bound to + vertex_limit_slot: u32, + /// Length of the shortest instance rate vertex buffer + instance_limit: u32, + /// Buffer slot which the shortest instance rate vertex buffer is bound to + instance_limit_slot: u32, + /// Total amount of buffers required by the pipeline. + buffers_required: u32, +} + +impl VertexState { + fn update_limits(&mut self) { + self.vertex_limit = u32::MAX; + self.instance_limit = u32::MAX; + for (idx, vbs) in self.inputs.iter().enumerate() { + if vbs.step.stride == 0 || !vbs.bound { + continue; + } + let limit = (vbs.total_size / vbs.step.stride) as u32; + match vbs.step.mode { + VertexStepMode::Vertex => { + if limit < self.vertex_limit { + self.vertex_limit = limit; + self.vertex_limit_slot = idx as _; + } + } + VertexStepMode::Instance => { + if limit < self.instance_limit { + self.instance_limit = limit; + self.instance_limit_slot = idx as _; + } + } + } + } + } + + fn reset(&mut self) { + self.inputs.clear(); + self.vertex_limit = 0; + self.instance_limit = 0; + } +} + +#[derive(Debug)] +struct State { + pipeline_flags: PipelineFlags, + binder: Binder, + blend_constant: OptionalState, + stencil_reference: u32, + pipeline: Option<id::RenderPipelineId>, + index: IndexState, + vertex: VertexState, + debug_scope_depth: u32, +} + +impl State { + fn is_ready(&self, indexed: bool) -> Result<(), DrawError> { + // Determine how many vertex buffers have already been bound + let vertex_buffer_count = self.vertex.inputs.iter().take_while(|v| v.bound).count() as u32; + // Compare with the needed quantity + if vertex_buffer_count < self.vertex.buffers_required { + return Err(DrawError::MissingVertexBuffer { + index: vertex_buffer_count, + }); + } + + let bind_mask = self.binder.invalid_mask(); + if bind_mask != 0 { + //let (expected, provided) = self.binder.entries[index as usize].info(); + return Err(DrawError::IncompatibleBindGroup { + index: bind_mask.trailing_zeros(), + }); + } + if self.pipeline.is_none() { + return Err(DrawError::MissingPipeline); + } + if self.blend_constant == OptionalState::Required { + return Err(DrawError::MissingBlendConstant); + } + + if indexed { + // Pipeline expects an index buffer + if let Some(pipeline_index_format) = self.index.pipeline_format { + // We have a buffer bound + let buffer_index_format = self.index.format.ok_or(DrawError::MissingIndexBuffer)?; + + // The buffers are different formats + if pipeline_index_format != buffer_index_format { + return Err(DrawError::UnmatchedIndexFormats { + pipeline: pipeline_index_format, + buffer: buffer_index_format, + }); + } + } + } + + self.binder.check_late_buffer_bindings()?; + + Ok(()) + } + + /// Reset the `RenderBundle`-related states. + fn reset_bundle(&mut self) { + self.binder.reset(); + self.pipeline = None; + self.index.reset(); + self.vertex.reset(); + } +} + +/// Error encountered when performing a render pass. +#[derive(Clone, Debug, Error)] +pub enum RenderPassErrorInner { + #[error(transparent)] + Encoder(#[from] CommandEncoderError), + #[error("attachment texture view {0:?} is invalid")] + InvalidAttachment(id::TextureViewId), + #[error("attachment format {0:?} is not a color format")] + InvalidColorAttachmentFormat(wgt::TextureFormat), + #[error("attachment format {0:?} is not a depth-stencil format")] + InvalidDepthStencilAttachmentFormat(wgt::TextureFormat), + #[error("attachment format {0:?} can not be resolved")] + UnsupportedResolveTargetFormat(wgt::TextureFormat), + #[error("missing color or depth_stencil attachments, at least one is required.")] + MissingAttachments, + #[error("attachments have differing sizes: {previous:?} is followed by {mismatch:?}")] + AttachmentsDimensionMismatch { + previous: (&'static str, wgt::Extent3d), + mismatch: (&'static str, wgt::Extent3d), + }, + #[error("attachment's sample count {0} is invalid")] + InvalidSampleCount(u32), + #[error("resolve source must be multi-sampled (has {src} samples) while the resolve destination must not be multisampled (has {dst} samples)")] + InvalidResolveSampleCounts { src: u32, dst: u32 }, + #[error( + "resource source format ({src:?}) must match the resolve destination format ({dst:?})" + )] + MismatchedResolveTextureFormat { + src: wgt::TextureFormat, + dst: wgt::TextureFormat, + }, + #[error("surface texture is dropped before the render pass is finished")] + SurfaceTextureDropped, + #[error("not enough memory left")] + OutOfMemory, + #[error("unable to clear non-present/read-only depth")] + InvalidDepthOps, + #[error("unable to clear non-present/read-only stencil")] + InvalidStencilOps, + #[error("all attachments must have the same sample count, found {actual} != {expected}")] + SampleCountMismatch { actual: u32, expected: u32 }, + #[error("setting `values_offset` to be `None` is only for internal use in render bundles")] + InvalidValuesOffset, + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), + #[error("indirect draw uses bytes {offset}..{end_offset} {} which overruns indirect buffer of size {buffer_size}", + count.map_or_else(String::new, |v| format!("(using count {})", v)))] + IndirectBufferOverrun { + count: Option<NonZeroU32>, + offset: u64, + end_offset: u64, + buffer_size: u64, + }, + #[error("indirect draw uses bytes {begin_count_offset}..{end_count_offset} which overruns indirect buffer of size {count_buffer_size}")] + IndirectCountBufferOverrun { + begin_count_offset: u64, + end_count_offset: u64, + count_buffer_size: u64, + }, + #[error("cannot pop debug group, because number of pushed debug groups is zero")] + InvalidPopDebugGroup, + #[error(transparent)] + ResourceUsageConflict(#[from] UsageConflict), + #[error("render bundle has incompatible targets, {0}")] + IncompatibleBundleTargets(#[from] RenderPassCompatibilityError), + #[error( + "render bundle has incompatible read-only flags: \ + bundle has flags depth = {bundle_depth} and stencil = {bundle_stencil}, \ + while the pass has flags depth = {pass_depth} and stencil = {pass_stencil}. \ + Read-only renderpasses are only compatible with read-only bundles for that aspect." + )] + IncompatibleBundleRods { + pass_depth: bool, + pass_stencil: bool, + bundle_depth: bool, + bundle_stencil: bool, + }, + #[error(transparent)] + RenderCommand(#[from] RenderCommandError), + #[error(transparent)] + Draw(#[from] DrawError), + #[error(transparent)] + Bind(#[from] BindError), + #[error(transparent)] + QueryUse(#[from] QueryUseError), + #[error("multiview layer count must match")] + MultiViewMismatch, + #[error( + "multiview pass texture views with more than one array layer must have D2Array dimension" + )] + MultiViewDimensionMismatch, +} + +impl PrettyError for RenderPassErrorInner { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + if let Self::InvalidAttachment(id) = *self { + fmt.texture_view_label_with_key(&id, "attachment"); + }; + } +} + +impl From<MissingBufferUsageError> for RenderPassErrorInner { + fn from(error: MissingBufferUsageError) -> Self { + Self::RenderCommand(error.into()) + } +} + +impl From<MissingTextureUsageError> for RenderPassErrorInner { + fn from(error: MissingTextureUsageError) -> Self { + Self::RenderCommand(error.into()) + } +} + +/// Error encountered when performing a render pass. +#[derive(Clone, Debug, Error)] +#[error("{scope}")] +pub struct RenderPassError { + pub scope: PassErrorScope, + #[source] + inner: RenderPassErrorInner, +} +impl PrettyError for RenderPassError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + // This error is wrapper for the inner error, + // but the scope has useful labels + fmt.error(self); + self.scope.fmt_pretty(fmt); + } +} + +impl<T, E> MapPassErr<T, RenderPassError> for Result<T, E> +where + E: Into<RenderPassErrorInner>, +{ + fn map_pass_err(self, scope: PassErrorScope) -> Result<T, RenderPassError> { + self.map_err(|inner| RenderPassError { + scope, + inner: inner.into(), + }) + } +} + +struct RenderAttachment<'a> { + texture_id: &'a Stored<id::TextureId>, + selector: &'a TextureSelector, + usage: hal::TextureUses, +} + +impl<A: hal::Api> TextureView<A> { + fn to_render_attachment(&self, usage: hal::TextureUses) -> RenderAttachment { + RenderAttachment { + texture_id: &self.parent_id, + selector: &self.selector, + usage, + } + } +} + +const MAX_TOTAL_ATTACHMENTS: usize = hal::MAX_COLOR_ATTACHMENTS + hal::MAX_COLOR_ATTACHMENTS + 1; +type AttachmentDataVec<T> = ArrayVec<T, MAX_TOTAL_ATTACHMENTS>; + +struct RenderPassInfo<'a, A: HalApi> { + context: RenderPassContext, + usage_scope: UsageScope<A>, + /// All render attachments, including depth/stencil + render_attachments: AttachmentDataVec<RenderAttachment<'a>>, + is_depth_read_only: bool, + is_stencil_read_only: bool, + extent: wgt::Extent3d, + _phantom: PhantomData<A>, + + pending_discard_init_fixups: SurfacesInDiscardState, + divergent_discarded_depth_stencil_aspect: Option<(wgt::TextureAspect, &'a TextureView<A>)>, + multiview: Option<NonZeroU32>, +} + +impl<'a, A: HalApi> RenderPassInfo<'a, A> { + fn add_pass_texture_init_actions<V>( + channel: &PassChannel<V>, + texture_memory_actions: &mut CommandBufferTextureMemoryActions, + view: &TextureView<A>, + texture_guard: &Storage<Texture<A>, id::TextureId>, + pending_discard_init_fixups: &mut SurfacesInDiscardState, + ) { + if channel.load_op == LoadOp::Load { + pending_discard_init_fixups.extend(texture_memory_actions.register_init_action( + &TextureInitTrackerAction { + id: view.parent_id.value.0, + range: TextureInitRange::from(view.selector.clone()), + // Note that this is needed even if the target is discarded, + kind: MemoryInitKind::NeedsInitializedMemory, + }, + texture_guard, + )); + } else if channel.store_op == StoreOp::Store { + // Clear + Store + texture_memory_actions.register_implicit_init( + view.parent_id.value, + TextureInitRange::from(view.selector.clone()), + texture_guard, + ); + } + if channel.store_op == StoreOp::Discard { + // the discard happens at the *end* of a pass, but recording the + // discard right away be alright since the texture can't be used + // during the pass anyways + texture_memory_actions.discard(TextureSurfaceDiscard { + texture: view.parent_id.value.0, + mip_level: view.selector.mips.start, + layer: view.selector.layers.start, + }); + } + } + + fn start( + device: &Device<A>, + label: Option<&str>, + color_attachments: &[Option<RenderPassColorAttachment>], + depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>, + cmd_buf: &mut CommandBuffer<A>, + view_guard: &'a Storage<TextureView<A>, id::TextureViewId>, + buffer_guard: &'a Storage<Buffer<A>, id::BufferId>, + texture_guard: &'a Storage<Texture<A>, id::TextureId>, + ) -> Result<Self, RenderPassErrorInner> { + profiling::scope!("RenderPassInfo::start"); + + // We default to false intentionally, even if depth-stencil isn't used at all. + // This allows us to use the primary raw pipeline in `RenderPipeline`, + // instead of the special read-only one, which would be `None`. + let mut is_depth_read_only = false; + let mut is_stencil_read_only = false; + + let mut render_attachments = AttachmentDataVec::<RenderAttachment>::new(); + let mut discarded_surfaces = AttachmentDataVec::new(); + let mut pending_discard_init_fixups = SurfacesInDiscardState::new(); + let mut divergent_discarded_depth_stencil_aspect = None; + + let mut attachment_type_name = ""; + let mut extent = None; + let mut sample_count = 0; + + let mut detected_multiview: Option<Option<NonZeroU32>> = None; + + let mut check_multiview = |view: &TextureView<A>| { + // Get the multiview configuration for this texture view + let layers = view.selector.layers.end - view.selector.layers.start; + let this_multiview = if layers >= 2 { + // Trivially proven by the if above + Some(unsafe { NonZeroU32::new_unchecked(layers) }) + } else { + None + }; + + // Make sure that if this view is a multiview, it is set to be an array + if this_multiview.is_some() && view.desc.dimension != TextureViewDimension::D2Array { + return Err(RenderPassErrorInner::MultiViewDimensionMismatch); + } + + // Validate matching first, or store the first one + if let Some(multiview) = detected_multiview { + if multiview != this_multiview { + return Err(RenderPassErrorInner::MultiViewMismatch); + } + } else { + // Multiview is only supported if the feature is enabled + if this_multiview.is_some() { + device.require_features(wgt::Features::MULTIVIEW)?; + } + + detected_multiview = Some(this_multiview); + } + + Ok(()) + }; + let mut add_view = |view: &TextureView<A>, type_name| { + if let Some(ex) = extent { + if ex != view.extent { + return Err(RenderPassErrorInner::AttachmentsDimensionMismatch { + previous: (attachment_type_name, ex), + mismatch: (type_name, view.extent), + }); + } + } else { + extent = Some(view.extent); + } + if sample_count == 0 { + sample_count = view.samples; + } else if sample_count != view.samples { + return Err(RenderPassErrorInner::SampleCountMismatch { + actual: view.samples, + expected: sample_count, + }); + } + attachment_type_name = type_name; + Ok(()) + }; + + let mut colors = + ArrayVec::<Option<hal::ColorAttachment<A>>, { hal::MAX_COLOR_ATTACHMENTS }>::new(); + let mut depth_stencil = None; + + if let Some(at) = depth_stencil_attachment { + let view: &TextureView<A> = cmd_buf + .trackers + .views + .add_single(view_guard, at.view) + .ok_or(RenderPassErrorInner::InvalidAttachment(at.view))?; + check_multiview(view)?; + add_view(view, "depth")?; + + let ds_aspects = view.desc.aspects(); + if ds_aspects.contains(hal::FormatAspects::COLOR) { + return Err(RenderPassErrorInner::InvalidDepthStencilAttachmentFormat( + view.desc.format, + )); + } + + if !ds_aspects.contains(hal::FormatAspects::STENCIL) + || (at.stencil.load_op == at.depth.load_op + && at.stencil.store_op == at.depth.store_op) + { + Self::add_pass_texture_init_actions( + &at.depth, + &mut cmd_buf.texture_memory_actions, + view, + texture_guard, + &mut pending_discard_init_fixups, + ); + } else if !ds_aspects.contains(hal::FormatAspects::DEPTH) { + Self::add_pass_texture_init_actions( + &at.stencil, + &mut cmd_buf.texture_memory_actions, + view, + texture_guard, + &mut pending_discard_init_fixups, + ); + } else { + // This is the only place (anywhere in wgpu) where Stencil & + // Depth init state can diverge. + // + // To safe us the overhead of tracking init state of texture + // aspects everywhere, we're going to cheat a little bit in + // order to keep the init state of both Stencil and Depth + // aspects in sync. The expectation is that we hit this path + // extremely rarely! + // + // Diverging LoadOp, i.e. Load + Clear: + // + // Record MemoryInitKind::NeedsInitializedMemory for the entire + // surface, a bit wasteful on unit but no negative effect! + // + // Rationale: If the loaded channel is uninitialized it needs + // clearing, the cleared channel doesn't care. (If everything is + // already initialized nothing special happens) + // + // (possible minor optimization: Clear caused by + // NeedsInitializedMemory should know that it doesn't need to + // clear the aspect that was set to C) + let need_init_beforehand = + at.depth.load_op == LoadOp::Load || at.stencil.load_op == LoadOp::Load; + if need_init_beforehand { + pending_discard_init_fixups.extend( + cmd_buf.texture_memory_actions.register_init_action( + &TextureInitTrackerAction { + id: view.parent_id.value.0, + range: TextureInitRange::from(view.selector.clone()), + kind: MemoryInitKind::NeedsInitializedMemory, + }, + texture_guard, + ), + ); + } + + // Diverging Store, i.e. Discard + Store: + // + // Immediately zero out channel that is set to discard after + // we're done with the render pass. This allows us to set the + // entire surface to MemoryInitKind::ImplicitlyInitialized (if + // it isn't already set to NeedsInitializedMemory). + // + // (possible optimization: Delay and potentially drop this zeroing) + if at.depth.store_op != at.stencil.store_op { + if !need_init_beforehand { + cmd_buf.texture_memory_actions.register_implicit_init( + view.parent_id.value, + TextureInitRange::from(view.selector.clone()), + texture_guard, + ); + } + divergent_discarded_depth_stencil_aspect = Some(( + if at.depth.store_op == StoreOp::Discard { + wgt::TextureAspect::DepthOnly + } else { + wgt::TextureAspect::StencilOnly + }, + view, + )); + } else if at.depth.store_op == StoreOp::Discard { + // Both are discarded using the regular path. + discarded_surfaces.push(TextureSurfaceDiscard { + texture: view.parent_id.value.0, + mip_level: view.selector.mips.start, + layer: view.selector.layers.start, + }); + } + } + + (is_depth_read_only, is_stencil_read_only) = at.depth_stencil_read_only(ds_aspects)?; + + let usage = if is_depth_read_only && is_stencil_read_only { + hal::TextureUses::DEPTH_STENCIL_READ | hal::TextureUses::RESOURCE + } else { + hal::TextureUses::DEPTH_STENCIL_WRITE + }; + render_attachments.push(view.to_render_attachment(usage)); + + depth_stencil = Some(hal::DepthStencilAttachment { + target: hal::Attachment { + view: &view.raw, + usage, + }, + depth_ops: at.depth.hal_ops(), + stencil_ops: at.stencil.hal_ops(), + clear_value: (at.depth.clear_value, at.stencil.clear_value), + }); + } + + for at in color_attachments { + let at = if let Some(attachment) = at.as_ref() { + attachment + } else { + colors.push(None); + continue; + }; + let color_view: &TextureView<A> = cmd_buf + .trackers + .views + .add_single(view_guard, at.view) + .ok_or(RenderPassErrorInner::InvalidAttachment(at.view))?; + check_multiview(color_view)?; + add_view(color_view, "color")?; + + if !color_view + .desc + .aspects() + .contains(hal::FormatAspects::COLOR) + { + return Err(RenderPassErrorInner::InvalidColorAttachmentFormat( + color_view.desc.format, + )); + } + + Self::add_pass_texture_init_actions( + &at.channel, + &mut cmd_buf.texture_memory_actions, + color_view, + texture_guard, + &mut pending_discard_init_fixups, + ); + render_attachments + .push(color_view.to_render_attachment(hal::TextureUses::COLOR_TARGET)); + + let mut hal_resolve_target = None; + if let Some(resolve_target) = at.resolve_target { + let resolve_view: &TextureView<A> = cmd_buf + .trackers + .views + .add_single(view_guard, resolve_target) + .ok_or(RenderPassErrorInner::InvalidAttachment(resolve_target))?; + + check_multiview(resolve_view)?; + if color_view.extent != resolve_view.extent { + return Err(RenderPassErrorInner::AttachmentsDimensionMismatch { + previous: (attachment_type_name, extent.unwrap_or_default()), + mismatch: ("resolve", resolve_view.extent), + }); + } + if color_view.samples == 1 || resolve_view.samples != 1 { + return Err(RenderPassErrorInner::InvalidResolveSampleCounts { + src: color_view.samples, + dst: resolve_view.samples, + }); + } + if color_view.desc.format != resolve_view.desc.format { + return Err(RenderPassErrorInner::MismatchedResolveTextureFormat { + src: color_view.desc.format, + dst: resolve_view.desc.format, + }); + } + if !resolve_view + .format_features + .flags + .contains(wgt::TextureFormatFeatureFlags::MULTISAMPLE_RESOLVE) + { + return Err(RenderPassErrorInner::UnsupportedResolveTargetFormat( + resolve_view.desc.format, + )); + } + + cmd_buf.texture_memory_actions.register_implicit_init( + resolve_view.parent_id.value, + TextureInitRange::from(resolve_view.selector.clone()), + texture_guard, + ); + render_attachments + .push(resolve_view.to_render_attachment(hal::TextureUses::COLOR_TARGET)); + + hal_resolve_target = Some(hal::Attachment { + view: &resolve_view.raw, + usage: hal::TextureUses::COLOR_TARGET, + }); + } + + colors.push(Some(hal::ColorAttachment { + target: hal::Attachment { + view: &color_view.raw, + usage: hal::TextureUses::COLOR_TARGET, + }, + resolve_target: hal_resolve_target, + ops: at.channel.hal_ops(), + clear_value: at.channel.clear_value, + })); + } + + let extent = extent.ok_or(RenderPassErrorInner::MissingAttachments)?; + let multiview = detected_multiview.expect("Multiview was not detected, no attachments"); + + let view_data = AttachmentData { + colors: color_attachments + .iter() + .map(|at| at.as_ref().map(|at| view_guard.get(at.view).unwrap())) + .collect(), + resolves: color_attachments + .iter() + .filter_map(|at| match *at { + Some(RenderPassColorAttachment { + resolve_target: Some(resolve), + .. + }) => Some(view_guard.get(resolve).unwrap()), + _ => None, + }) + .collect(), + depth_stencil: depth_stencil_attachment.map(|at| view_guard.get(at.view).unwrap()), + }; + + let context = RenderPassContext { + attachments: view_data.map(|view| view.desc.format), + sample_count, + multiview, + }; + + let hal_desc = hal::RenderPassDescriptor { + label, + extent, + sample_count, + color_attachments: &colors, + depth_stencil_attachment: depth_stencil, + multiview, + }; + unsafe { + cmd_buf.encoder.raw.begin_render_pass(&hal_desc); + }; + + Ok(Self { + context, + usage_scope: UsageScope::new(buffer_guard, texture_guard), + render_attachments, + is_depth_read_only, + is_stencil_read_only, + extent, + _phantom: PhantomData, + pending_discard_init_fixups, + divergent_discarded_depth_stencil_aspect, + multiview, + }) + } + + fn finish( + mut self, + raw: &mut A::CommandEncoder, + texture_guard: &Storage<Texture<A>, id::TextureId>, + ) -> Result<(UsageScope<A>, SurfacesInDiscardState), RenderPassErrorInner> { + profiling::scope!("RenderPassInfo::finish"); + unsafe { + raw.end_render_pass(); + } + + for ra in self.render_attachments { + if !texture_guard.contains(ra.texture_id.value.0) { + return Err(RenderPassErrorInner::SurfaceTextureDropped); + } + let texture = &texture_guard[ra.texture_id.value]; + check_texture_usage(texture.desc.usage, TextureUsages::RENDER_ATTACHMENT)?; + + // the tracker set of the pass is always in "extend" mode + unsafe { + self.usage_scope + .textures + .merge_single( + texture_guard, + ra.texture_id.value, + Some(ra.selector.clone()), + &ra.texture_id.ref_count, + ra.usage, + ) + .map_err(UsageConflict::from)? + }; + } + + // If either only stencil or depth was discarded, we put in a special + // clear pass to keep the init status of the aspects in sync. We do this + // so we don't need to track init state for depth/stencil aspects + // individually. + // + // Note that we don't go the usual route of "brute force" initializing + // the texture when need arises here, since this path is actually + // something a user may genuinely want (where as the other cases are + // more seen along the lines as gracefully handling a user error). + if let Some((aspect, view)) = self.divergent_discarded_depth_stencil_aspect { + let (depth_ops, stencil_ops) = if aspect == wgt::TextureAspect::DepthOnly { + ( + hal::AttachmentOps::STORE, // clear depth + hal::AttachmentOps::LOAD | hal::AttachmentOps::STORE, // unchanged stencil + ) + } else { + ( + hal::AttachmentOps::LOAD | hal::AttachmentOps::STORE, // unchanged stencil + hal::AttachmentOps::STORE, // clear depth + ) + }; + let desc = hal::RenderPassDescriptor { + label: Some("(wgpu internal) Zero init discarded depth/stencil aspect"), + extent: view.extent, + sample_count: view.samples, + color_attachments: &[], + depth_stencil_attachment: Some(hal::DepthStencilAttachment { + target: hal::Attachment { + view: &view.raw, + usage: hal::TextureUses::DEPTH_STENCIL_WRITE, + }, + depth_ops, + stencil_ops, + clear_value: (0.0, 0), + }), + multiview: self.multiview, + }; + unsafe { + raw.begin_render_pass(&desc); + raw.end_render_pass(); + } + } + + Ok((self.usage_scope, self.pending_discard_init_fixups)) + } +} + +// Common routines between render/compute + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + pub fn command_encoder_run_render_pass<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + pass: &RenderPass, + ) -> Result<(), RenderPassError> { + self.command_encoder_run_render_pass_impl::<A>( + encoder_id, + pass.base.as_ref(), + &pass.color_targets, + pass.depth_stencil_target.as_ref(), + ) + } + + #[doc(hidden)] + pub fn command_encoder_run_render_pass_impl<A: HalApi>( + &self, + encoder_id: id::CommandEncoderId, + base: BasePassRef<RenderCommand>, + color_attachments: &[Option<RenderPassColorAttachment>], + depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>, + ) -> Result<(), RenderPassError> { + profiling::scope!("CommandEncoder::run_render_pass"); + let init_scope = PassErrorScope::Pass(encoder_id); + + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); + + let (scope, query_reset_state, pending_discard_init_fixups) = { + let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token); + + // Spell out the type, to placate rust-analyzer. + // https://github.com/rust-lang/rust-analyzer/issues/12247 + let cmd_buf: &mut CommandBuffer<A> = + CommandBuffer::get_encoder_mut(&mut *cmb_guard, encoder_id) + .map_pass_err(init_scope)?; + // close everything while the new command encoder is filled + cmd_buf.encoder.close(); + // will be reset to true if recording is done without errors + cmd_buf.status = CommandEncoderStatus::Error; + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(crate::device::trace::Command::RunRenderPass { + base: BasePass::from_ref(base), + target_colors: color_attachments.to_vec(), + target_depth_stencil: depth_stencil_attachment.cloned(), + }); + } + + let device = &device_guard[cmd_buf.device_id.value]; + cmd_buf.encoder.open_pass(base.label); + + let (bundle_guard, mut token) = hub.render_bundles.read(&mut token); + let (pipeline_layout_guard, mut token) = hub.pipeline_layouts.read(&mut token); + let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); + let (render_pipeline_guard, mut token) = hub.render_pipelines.read(&mut token); + let (query_set_guard, mut token) = hub.query_sets.read(&mut token); + let (buffer_guard, mut token) = hub.buffers.read(&mut token); + let (texture_guard, mut token) = hub.textures.read(&mut token); + let (view_guard, _) = hub.texture_views.read(&mut token); + + log::trace!( + "Encoding render pass begin in command buffer {:?}", + encoder_id + ); + + let mut info = RenderPassInfo::start( + device, + base.label, + color_attachments, + depth_stencil_attachment, + cmd_buf, + &*view_guard, + &*buffer_guard, + &*texture_guard, + ) + .map_pass_err(init_scope)?; + + cmd_buf.trackers.set_size( + Some(&*buffer_guard), + Some(&*texture_guard), + Some(&*view_guard), + None, + Some(&*bind_group_guard), + None, + Some(&*render_pipeline_guard), + Some(&*bundle_guard), + Some(&*query_set_guard), + ); + + let raw = &mut cmd_buf.encoder.raw; + + let mut state = State { + pipeline_flags: PipelineFlags::empty(), + binder: Binder::new(), + blend_constant: OptionalState::Unused, + stencil_reference: 0, + pipeline: None, + index: IndexState::default(), + vertex: VertexState::default(), + debug_scope_depth: 0, + }; + let mut temp_offsets = Vec::new(); + let mut dynamic_offset_count = 0; + let mut string_offset = 0; + let mut active_query = None; + let mut query_reset_state = QueryResetMap::new(); + + for command in base.commands { + match *command { + RenderCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => { + let scope = PassErrorScope::SetBindGroup(bind_group_id); + let max_bind_groups = device.limits.max_bind_groups; + if (index as u32) >= max_bind_groups { + return Err(RenderCommandError::BindGroupIndexOutOfRange { + index, + max: max_bind_groups, + }) + .map_pass_err(scope); + } + + temp_offsets.clear(); + temp_offsets.extend_from_slice( + &base.dynamic_offsets[dynamic_offset_count + ..dynamic_offset_count + (num_dynamic_offsets as usize)], + ); + dynamic_offset_count += num_dynamic_offsets as usize; + + let bind_group: &crate::binding_model::BindGroup<A> = cmd_buf + .trackers + .bind_groups + .add_single(&*bind_group_guard, bind_group_id) + .ok_or(RenderCommandError::InvalidBindGroup(bind_group_id)) + .map_pass_err(scope)?; + bind_group + .validate_dynamic_bindings(index, &temp_offsets, &cmd_buf.limits) + .map_pass_err(scope)?; + + // merge the resource tracker in + unsafe { + info.usage_scope + .merge_bind_group(&*texture_guard, &bind_group.used) + .map_pass_err(scope)?; + } + //Note: stateless trackers are not merged: the lifetime reference + // is held to the bind group itself. + + cmd_buf.buffer_memory_init_actions.extend( + bind_group.used_buffer_ranges.iter().filter_map(|action| { + match buffer_guard.get(action.id) { + Ok(buffer) => buffer.initialization_status.check_action(action), + Err(_) => None, + } + }), + ); + for action in bind_group.used_texture_ranges.iter() { + info.pending_discard_init_fixups.extend( + cmd_buf + .texture_memory_actions + .register_init_action(action, &texture_guard), + ); + } + + let pipeline_layout_id = state.binder.pipeline_layout_id; + let entries = state.binder.assign_group( + index as usize, + id::Valid(bind_group_id), + bind_group, + &temp_offsets, + ); + if !entries.is_empty() { + let pipeline_layout = + &pipeline_layout_guard[pipeline_layout_id.unwrap()].raw; + for (i, e) in entries.iter().enumerate() { + let raw_bg = + &bind_group_guard[e.group_id.as_ref().unwrap().value].raw; + + unsafe { + raw.set_bind_group( + pipeline_layout, + index as u32 + i as u32, + raw_bg, + &e.dynamic_offsets, + ); + } + } + } + } + RenderCommand::SetPipeline(pipeline_id) => { + let scope = PassErrorScope::SetPipelineRender(pipeline_id); + state.pipeline = Some(pipeline_id); + + let pipeline: &pipeline::RenderPipeline<A> = cmd_buf + .trackers + .render_pipelines + .add_single(&*render_pipeline_guard, pipeline_id) + .ok_or(RenderCommandError::InvalidPipeline(pipeline_id)) + .map_pass_err(scope)?; + + info.context + .check_compatible(&pipeline.pass_context) + .map_err(RenderCommandError::IncompatiblePipelineTargets) + .map_pass_err(scope)?; + + state.pipeline_flags = pipeline.flags; + + if (pipeline.flags.contains(PipelineFlags::WRITES_DEPTH) + && info.is_depth_read_only) + || (pipeline.flags.contains(PipelineFlags::WRITES_STENCIL) + && info.is_stencil_read_only) + { + return Err(RenderCommandError::IncompatiblePipelineRods) + .map_pass_err(scope); + } + + state + .blend_constant + .require(pipeline.flags.contains(PipelineFlags::BLEND_CONSTANT)); + + unsafe { + raw.set_render_pipeline(&pipeline.raw); + } + + if pipeline.flags.contains(PipelineFlags::STENCIL_REFERENCE) { + unsafe { + raw.set_stencil_reference(state.stencil_reference); + } + } + + // Rebind resource + if state.binder.pipeline_layout_id != Some(pipeline.layout_id.value) { + let pipeline_layout = &pipeline_layout_guard[pipeline.layout_id.value]; + + let (start_index, entries) = state.binder.change_pipeline_layout( + &*pipeline_layout_guard, + pipeline.layout_id.value, + &pipeline.late_sized_buffer_groups, + ); + if !entries.is_empty() { + for (i, e) in entries.iter().enumerate() { + let raw_bg = + &bind_group_guard[e.group_id.as_ref().unwrap().value].raw; + + unsafe { + raw.set_bind_group( + &pipeline_layout.raw, + start_index as u32 + i as u32, + raw_bg, + &e.dynamic_offsets, + ); + } + } + } + + // Clear push constant ranges + let non_overlapping = super::bind::compute_nonoverlapping_ranges( + &pipeline_layout.push_constant_ranges, + ); + for range in non_overlapping { + let offset = range.range.start; + let size_bytes = range.range.end - offset; + super::push_constant_clear( + offset, + size_bytes, + |clear_offset, clear_data| unsafe { + raw.set_push_constants( + &pipeline_layout.raw, + range.stages, + clear_offset, + clear_data, + ); + }, + ); + } + } + + state.index.pipeline_format = pipeline.strip_index_format; + + let vertex_steps_len = pipeline.vertex_steps.len(); + state.vertex.buffers_required = vertex_steps_len as u32; + + // Initialize each `vertex.inputs[i].step` from + // `pipeline.vertex_steps[i]`. Enlarge `vertex.inputs` + // as necessary to accomodate all slots in the + // pipeline. If `vertex.inputs` is longer, fill the + // extra entries with default `VertexStep`s. + while state.vertex.inputs.len() < vertex_steps_len { + state.vertex.inputs.push(VertexBufferState::EMPTY); + } + + // This is worse as a `zip`, but it's close. + let mut steps = pipeline.vertex_steps.iter(); + for input in state.vertex.inputs.iter_mut() { + input.step = steps.next().cloned().unwrap_or_default(); + } + + // Update vertex buffer limits. + state.vertex.update_limits(); + } + RenderCommand::SetIndexBuffer { + buffer_id, + index_format, + offset, + size, + } => { + let scope = PassErrorScope::SetIndexBuffer(buffer_id); + let buffer: &Buffer<A> = info + .usage_scope + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDEX) + .map_pass_err(scope)?; + check_buffer_usage(buffer.usage, BufferUsages::INDEX) + .map_pass_err(scope)?; + let buf_raw = buffer + .raw + .as_ref() + .ok_or(RenderCommandError::DestroyedBuffer(buffer_id)) + .map_pass_err(scope)?; + + let end = match size { + Some(s) => offset + s.get(), + None => buffer.size, + }; + state.index.bound_buffer_view = Some((id::Valid(buffer_id), offset..end)); + + state.index.format = Some(index_format); + state.index.update_limit(); + + cmd_buf.buffer_memory_init_actions.extend( + buffer.initialization_status.create_action( + buffer_id, + offset..end, + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + let bb = hal::BufferBinding { + buffer: buf_raw, + offset, + size, + }; + unsafe { + raw.set_index_buffer(bb, index_format); + } + } + RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + } => { + let scope = PassErrorScope::SetVertexBuffer(buffer_id); + let buffer: &Buffer<A> = info + .usage_scope + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::VERTEX) + .map_pass_err(scope)?; + check_buffer_usage(buffer.usage, BufferUsages::VERTEX) + .map_pass_err(scope)?; + let buf_raw = buffer + .raw + .as_ref() + .ok_or(RenderCommandError::DestroyedBuffer(buffer_id)) + .map_pass_err(scope)?; + + let empty_slots = + (1 + slot as usize).saturating_sub(state.vertex.inputs.len()); + state + .vertex + .inputs + .extend(iter::repeat(VertexBufferState::EMPTY).take(empty_slots)); + let vertex_state = &mut state.vertex.inputs[slot as usize]; + //TODO: where are we checking that the offset is in bound? + vertex_state.total_size = match size { + Some(s) => s.get(), + None => buffer.size - offset, + }; + vertex_state.bound = true; + + cmd_buf.buffer_memory_init_actions.extend( + buffer.initialization_status.create_action( + buffer_id, + offset..(offset + vertex_state.total_size), + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + let bb = hal::BufferBinding { + buffer: buf_raw, + offset, + size, + }; + unsafe { + raw.set_vertex_buffer(slot, bb); + } + state.vertex.update_limits(); + } + RenderCommand::SetBlendConstant(ref color) => { + state.blend_constant = OptionalState::Set; + let array = [ + color.r as f32, + color.g as f32, + color.b as f32, + color.a as f32, + ]; + unsafe { + raw.set_blend_constants(&array); + } + } + RenderCommand::SetStencilReference(value) => { + state.stencil_reference = value; + if state + .pipeline_flags + .contains(PipelineFlags::STENCIL_REFERENCE) + { + unsafe { + raw.set_stencil_reference(value); + } + } + } + RenderCommand::SetViewport { + ref rect, + depth_min, + depth_max, + } => { + let scope = PassErrorScope::SetViewport; + if rect.w <= 0.0 || rect.h <= 0.0 { + return Err(RenderCommandError::InvalidViewportDimension( + rect.w, rect.h, + )) + .map_pass_err(scope); + } + if !(0.0..=1.0).contains(&depth_min) || !(0.0..=1.0).contains(&depth_max) { + return Err(RenderCommandError::InvalidViewportDepth( + depth_min, depth_max, + )) + .map_pass_err(scope); + } + let r = hal::Rect { + x: rect.x, + y: rect.y, + w: rect.w, + h: rect.h, + }; + unsafe { + raw.set_viewport(&r, depth_min..depth_max); + } + } + RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset, + } => { + let scope = PassErrorScope::SetPushConstant; + let values_offset = values_offset + .ok_or(RenderPassErrorInner::InvalidValuesOffset) + .map_pass_err(scope)?; + + let end_offset_bytes = offset + size_bytes; + let values_end_offset = + (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize; + let data_slice = + &base.push_constant_data[(values_offset as usize)..values_end_offset]; + + let pipeline_layout_id = state + .binder + .pipeline_layout_id + .ok_or(DrawError::MissingPipeline) + .map_pass_err(scope)?; + let pipeline_layout = &pipeline_layout_guard[pipeline_layout_id]; + + pipeline_layout + .validate_push_constant_ranges(stages, offset, end_offset_bytes) + .map_err(RenderCommandError::from) + .map_pass_err(scope)?; + + unsafe { + raw.set_push_constants(&pipeline_layout.raw, stages, offset, data_slice) + } + } + RenderCommand::SetScissor(ref rect) => { + let scope = PassErrorScope::SetScissorRect; + if rect.w == 0 + || rect.h == 0 + || rect.x + rect.w > info.extent.width + || rect.y + rect.h > info.extent.height + { + return Err(RenderCommandError::InvalidScissorRect(*rect, info.extent)) + .map_pass_err(scope); + } + let r = hal::Rect { + x: rect.x, + y: rect.y, + w: rect.w, + h: rect.h, + }; + unsafe { + raw.set_scissor_rect(&r); + } + } + RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + } => { + let indexed = false; + let scope = PassErrorScope::Draw { + indexed, + indirect: false, + pipeline: state.pipeline, + }; + state.is_ready(indexed).map_pass_err(scope)?; + + let last_vertex = first_vertex + vertex_count; + let vertex_limit = state.vertex.vertex_limit; + if last_vertex > vertex_limit { + return Err(DrawError::VertexBeyondLimit { + last_vertex, + vertex_limit, + slot: state.vertex.vertex_limit_slot, + }) + .map_pass_err(scope); + } + let last_instance = first_instance + instance_count; + let instance_limit = state.vertex.instance_limit; + if last_instance > instance_limit { + return Err(DrawError::InstanceBeyondLimit { + last_instance, + instance_limit, + slot: state.vertex.instance_limit_slot, + }) + .map_pass_err(scope); + } + + unsafe { + raw.draw(first_vertex, vertex_count, first_instance, instance_count); + } + } + RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + } => { + let indexed = true; + let scope = PassErrorScope::Draw { + indexed, + indirect: false, + pipeline: state.pipeline, + }; + state.is_ready(indexed).map_pass_err(scope)?; + + //TODO: validate that base_vertex + max_index() is + // within the provided range + let last_index = first_index + index_count; + let index_limit = state.index.limit; + if last_index > index_limit { + return Err(DrawError::IndexBeyondLimit { + last_index, + index_limit, + }) + .map_pass_err(scope); + } + let last_instance = first_instance + instance_count; + let instance_limit = state.vertex.instance_limit; + if last_instance > instance_limit { + return Err(DrawError::InstanceBeyondLimit { + last_instance, + instance_limit, + slot: state.vertex.instance_limit_slot, + }) + .map_pass_err(scope); + } + + unsafe { + raw.draw_indexed( + first_index, + index_count, + base_vertex, + first_instance, + instance_count, + ); + } + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count, + indexed, + } => { + let scope = PassErrorScope::Draw { + indexed, + indirect: true, + pipeline: state.pipeline, + }; + state.is_ready(indexed).map_pass_err(scope)?; + + let stride = match indexed { + false => mem::size_of::<wgt::DrawIndirectArgs>(), + true => mem::size_of::<wgt::DrawIndexedIndirectArgs>(), + }; + + if count.is_some() { + device + .require_features(wgt::Features::MULTI_DRAW_INDIRECT) + .map_pass_err(scope)?; + } + device + .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) + .map_pass_err(scope)?; + + let indirect_buffer: &Buffer<A> = info + .usage_scope + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDIRECT) + .map_pass_err(scope)?; + check_buffer_usage(indirect_buffer.usage, BufferUsages::INDIRECT) + .map_pass_err(scope)?; + let indirect_raw = indirect_buffer + .raw + .as_ref() + .ok_or(RenderCommandError::DestroyedBuffer(buffer_id)) + .map_pass_err(scope)?; + + let actual_count = count.map_or(1, |c| c.get()); + + let end_offset = offset + stride as u64 * actual_count as u64; + if end_offset > indirect_buffer.size { + return Err(RenderPassErrorInner::IndirectBufferOverrun { + count, + offset, + end_offset, + buffer_size: indirect_buffer.size, + }) + .map_pass_err(scope); + } + + cmd_buf.buffer_memory_init_actions.extend( + indirect_buffer.initialization_status.create_action( + buffer_id, + offset..end_offset, + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + match indexed { + false => unsafe { + raw.draw_indirect(indirect_raw, offset, actual_count); + }, + true => unsafe { + raw.draw_indexed_indirect(indirect_raw, offset, actual_count); + }, + } + } + RenderCommand::MultiDrawIndirectCount { + buffer_id, + offset, + count_buffer_id, + count_buffer_offset, + max_count, + indexed, + } => { + let scope = PassErrorScope::Draw { + indexed, + indirect: true, + pipeline: state.pipeline, + }; + state.is_ready(indexed).map_pass_err(scope)?; + + let stride = match indexed { + false => mem::size_of::<wgt::DrawIndirectArgs>(), + true => mem::size_of::<wgt::DrawIndexedIndirectArgs>(), + } as u64; + + device + .require_features(wgt::Features::MULTI_DRAW_INDIRECT_COUNT) + .map_pass_err(scope)?; + device + .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) + .map_pass_err(scope)?; + + let indirect_buffer: &Buffer<A> = info + .usage_scope + .buffers + .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDIRECT) + .map_pass_err(scope)?; + check_buffer_usage(indirect_buffer.usage, BufferUsages::INDIRECT) + .map_pass_err(scope)?; + let indirect_raw = indirect_buffer + .raw + .as_ref() + .ok_or(RenderCommandError::DestroyedBuffer(buffer_id)) + .map_pass_err(scope)?; + + let count_buffer: &Buffer<A> = info + .usage_scope + .buffers + .merge_single( + &*buffer_guard, + count_buffer_id, + hal::BufferUses::INDIRECT, + ) + .map_pass_err(scope)?; + check_buffer_usage(count_buffer.usage, BufferUsages::INDIRECT) + .map_pass_err(scope)?; + let count_raw = count_buffer + .raw + .as_ref() + .ok_or(RenderCommandError::DestroyedBuffer(count_buffer_id)) + .map_pass_err(scope)?; + + let end_offset = offset + stride * max_count as u64; + if end_offset > indirect_buffer.size { + return Err(RenderPassErrorInner::IndirectBufferOverrun { + count: None, + offset, + end_offset, + buffer_size: indirect_buffer.size, + }) + .map_pass_err(scope); + } + cmd_buf.buffer_memory_init_actions.extend( + indirect_buffer.initialization_status.create_action( + buffer_id, + offset..end_offset, + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + let begin_count_offset = count_buffer_offset; + let end_count_offset = count_buffer_offset + 4; + if end_count_offset > count_buffer.size { + return Err(RenderPassErrorInner::IndirectCountBufferOverrun { + begin_count_offset, + end_count_offset, + count_buffer_size: count_buffer.size, + }) + .map_pass_err(scope); + } + cmd_buf.buffer_memory_init_actions.extend( + count_buffer.initialization_status.create_action( + count_buffer_id, + count_buffer_offset..end_count_offset, + MemoryInitKind::NeedsInitializedMemory, + ), + ); + + match indexed { + false => unsafe { + raw.draw_indirect_count( + indirect_raw, + offset, + count_raw, + count_buffer_offset, + max_count, + ); + }, + true => unsafe { + raw.draw_indexed_indirect_count( + indirect_raw, + offset, + count_raw, + count_buffer_offset, + max_count, + ); + }, + } + } + RenderCommand::PushDebugGroup { color: _, len } => { + state.debug_scope_depth += 1; + let label = + str::from_utf8(&base.string_data[string_offset..string_offset + len]) + .unwrap(); + string_offset += len; + unsafe { + raw.begin_debug_marker(label); + } + } + RenderCommand::PopDebugGroup => { + let scope = PassErrorScope::PopDebugGroup; + if state.debug_scope_depth == 0 { + return Err(RenderPassErrorInner::InvalidPopDebugGroup) + .map_pass_err(scope); + } + state.debug_scope_depth -= 1; + unsafe { + raw.end_debug_marker(); + } + } + RenderCommand::InsertDebugMarker { color: _, len } => { + let label = + str::from_utf8(&base.string_data[string_offset..string_offset + len]) + .unwrap(); + string_offset += len; + unsafe { + raw.insert_debug_marker(label); + } + } + RenderCommand::WriteTimestamp { + query_set_id, + query_index, + } => { + let scope = PassErrorScope::WriteTimestamp; + + device + .require_features(wgt::Features::WRITE_TIMESTAMP_INSIDE_PASSES) + .map_pass_err(scope)?; + + let query_set: &resource::QuerySet<A> = cmd_buf + .trackers + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(RenderCommandError::InvalidQuerySet(query_set_id)) + .map_pass_err(scope)?; + + query_set + .validate_and_write_timestamp( + raw, + query_set_id, + query_index, + Some(&mut query_reset_state), + ) + .map_pass_err(scope)?; + } + RenderCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + } => { + let scope = PassErrorScope::BeginPipelineStatisticsQuery; + + let query_set: &resource::QuerySet<A> = cmd_buf + .trackers + .query_sets + .add_single(&*query_set_guard, query_set_id) + .ok_or(RenderCommandError::InvalidQuerySet(query_set_id)) + .map_pass_err(scope)?; + + query_set + .validate_and_begin_pipeline_statistics_query( + raw, + query_set_id, + query_index, + Some(&mut query_reset_state), + &mut active_query, + ) + .map_pass_err(scope)?; + } + RenderCommand::EndPipelineStatisticsQuery => { + let scope = PassErrorScope::EndPipelineStatisticsQuery; + + end_pipeline_statistics_query(raw, &*query_set_guard, &mut active_query) + .map_pass_err(scope)?; + } + RenderCommand::ExecuteBundle(bundle_id) => { + let scope = PassErrorScope::ExecuteBundle; + let bundle: &command::RenderBundle<A> = cmd_buf + .trackers + .bundles + .add_single(&*bundle_guard, bundle_id) + .ok_or(RenderCommandError::InvalidRenderBundle(bundle_id)) + .map_pass_err(scope)?; + + info.context + .check_compatible(&bundle.context) + .map_err(RenderPassErrorInner::IncompatibleBundleTargets) + .map_pass_err(scope)?; + + if (info.is_depth_read_only && !bundle.is_depth_read_only) + || (info.is_stencil_read_only && !bundle.is_stencil_read_only) + { + return Err(RenderPassErrorInner::IncompatibleBundleRods { + pass_depth: info.is_depth_read_only, + pass_stencil: info.is_stencil_read_only, + bundle_depth: bundle.is_depth_read_only, + bundle_stencil: bundle.is_stencil_read_only, + }) + .map_pass_err(scope); + } + + cmd_buf.buffer_memory_init_actions.extend( + bundle + .buffer_memory_init_actions + .iter() + .filter_map(|action| match buffer_guard.get(action.id) { + Ok(buffer) => buffer.initialization_status.check_action(action), + Err(_) => None, + }), + ); + for action in bundle.texture_memory_init_actions.iter() { + info.pending_discard_init_fixups.extend( + cmd_buf + .texture_memory_actions + .register_init_action(action, &texture_guard), + ); + } + + unsafe { + bundle.execute( + raw, + &*pipeline_layout_guard, + &*bind_group_guard, + &*render_pipeline_guard, + &*buffer_guard, + ) + } + .map_err(|e| match e { + ExecutionError::DestroyedBuffer(id) => { + RenderCommandError::DestroyedBuffer(id) + } + ExecutionError::Unimplemented(what) => { + RenderCommandError::Unimplemented(what) + } + }) + .map_pass_err(scope)?; + + unsafe { + info.usage_scope + .merge_render_bundle(&*texture_guard, &bundle.used) + .map_pass_err(scope)?; + cmd_buf + .trackers + .add_from_render_bundle(&bundle.used) + .map_pass_err(scope)?; + }; + state.reset_bundle(); + } + } + } + + log::trace!("Merging renderpass into cmd_buf {:?}", encoder_id); + let (trackers, pending_discard_init_fixups) = + info.finish(raw, &*texture_guard).map_pass_err(init_scope)?; + + cmd_buf.encoder.close(); + (trackers, query_reset_state, pending_discard_init_fixups) + }; + + let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token); + let (query_set_guard, mut token) = hub.query_sets.read(&mut token); + let (buffer_guard, mut token) = hub.buffers.read(&mut token); + let (texture_guard, _) = hub.textures.read(&mut token); + + let cmd_buf = cmb_guard.get_mut(encoder_id).unwrap(); + { + let transit = cmd_buf.encoder.open(); + + fixup_discarded_surfaces( + pending_discard_init_fixups.into_iter(), + transit, + &texture_guard, + &mut cmd_buf.trackers.textures, + &device_guard[cmd_buf.device_id.value], + ); + + query_reset_state + .reset_queries( + transit, + &query_set_guard, + cmd_buf.device_id.value.0.backend(), + ) + .map_err(RenderCommandError::InvalidQuerySet) + .map_pass_err(PassErrorScope::QueryReset)?; + + super::CommandBuffer::insert_barriers_from_scope( + transit, + &mut cmd_buf.trackers, + &scope, + &*buffer_guard, + &*texture_guard, + ); + } + + // Before we finish the auxiliary encoder, let's + // get our pass back and place it after. + //Note: we could just hold onto this raw pass while recording the + // auxiliary encoder, but then handling errors and cleaning up + // would be more complicated, so we re-use `open()`/`close()`. + let pass_raw = cmd_buf.encoder.list.pop().unwrap(); + cmd_buf.encoder.close(); + cmd_buf.encoder.list.push(pass_raw); + cmd_buf.status = CommandEncoderStatus::Recording; + + Ok(()) + } +} + +pub mod render_ffi { + use super::{ + super::{Rect, RenderCommand}, + RenderPass, + }; + use crate::{id, RawString}; + use std::{convert::TryInto, ffi, num::NonZeroU32, slice}; + use wgt::{BufferAddress, BufferSize, Color, DynamicOffset, IndexFormat}; + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `offset_length` elements. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_set_bind_group( + pass: &mut RenderPass, + index: u32, + bind_group_id: id::BindGroupId, + offsets: *const DynamicOffset, + offset_length: usize, + ) { + let redundant = unsafe { + pass.current_bind_groups.set_and_check_redundant( + bind_group_id, + index, + &mut pass.base.dynamic_offsets, + offsets, + offset_length, + ) + }; + + if redundant { + return; + } + + pass.base.commands.push(RenderCommand::SetBindGroup { + index: index.try_into().unwrap(), + num_dynamic_offsets: offset_length.try_into().unwrap(), + bind_group_id, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_pipeline( + pass: &mut RenderPass, + pipeline_id: id::RenderPipelineId, + ) { + if pass.current_pipeline.set_and_check_redundant(pipeline_id) { + return; + } + + pass.base + .commands + .push(RenderCommand::SetPipeline(pipeline_id)); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_vertex_buffer( + pass: &mut RenderPass, + slot: u32, + buffer_id: id::BufferId, + offset: BufferAddress, + size: Option<BufferSize>, + ) { + pass.base.commands.push(RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_index_buffer( + pass: &mut RenderPass, + buffer: id::BufferId, + index_format: IndexFormat, + offset: BufferAddress, + size: Option<BufferSize>, + ) { + pass.set_index_buffer(buffer, index_format, offset, size); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_blend_constant(pass: &mut RenderPass, color: &Color) { + pass.base + .commands + .push(RenderCommand::SetBlendConstant(*color)); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_stencil_reference(pass: &mut RenderPass, value: u32) { + pass.base + .commands + .push(RenderCommand::SetStencilReference(value)); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_viewport( + pass: &mut RenderPass, + x: f32, + y: f32, + w: f32, + h: f32, + depth_min: f32, + depth_max: f32, + ) { + pass.base.commands.push(RenderCommand::SetViewport { + rect: Rect { x, y, w, h }, + depth_min, + depth_max, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_set_scissor_rect( + pass: &mut RenderPass, + x: u32, + y: u32, + w: u32, + h: u32, + ) { + pass.base + .commands + .push(RenderCommand::SetScissor(Rect { x, y, w, h })); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `size_bytes` bytes. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_set_push_constants( + pass: &mut RenderPass, + stages: wgt::ShaderStages, + offset: u32, + size_bytes: u32, + data: *const u8, + ) { + assert_eq!( + offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant offset must be aligned to 4 bytes." + ); + assert_eq!( + size_bytes & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant size must be aligned to 4 bytes." + ); + let data_slice = unsafe { slice::from_raw_parts(data, size_bytes as usize) }; + let value_offset = pass.base.push_constant_data.len().try_into().expect( + "Ran out of push constant space. Don't set 4gb of push constants per RenderPass.", + ); + + pass.base.push_constant_data.extend( + data_slice + .chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) + .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])), + ); + + pass.base.commands.push(RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset: Some(value_offset), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_draw( + pass: &mut RenderPass, + vertex_count: u32, + instance_count: u32, + first_vertex: u32, + first_instance: u32, + ) { + pass.base.commands.push(RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_draw_indexed( + pass: &mut RenderPass, + index_count: u32, + instance_count: u32, + first_index: u32, + base_vertex: i32, + first_instance: u32, + ) { + pass.base.commands.push(RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_draw_indirect( + pass: &mut RenderPass, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + pass.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: false, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_draw_indexed_indirect( + pass: &mut RenderPass, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + pass.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: true, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_multi_draw_indirect( + pass: &mut RenderPass, + buffer_id: id::BufferId, + offset: BufferAddress, + count: u32, + ) { + pass.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: NonZeroU32::new(count), + indexed: false, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_multi_draw_indexed_indirect( + pass: &mut RenderPass, + buffer_id: id::BufferId, + offset: BufferAddress, + count: u32, + ) { + pass.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: NonZeroU32::new(count), + indexed: true, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_multi_draw_indirect_count( + pass: &mut RenderPass, + buffer_id: id::BufferId, + offset: BufferAddress, + count_buffer_id: id::BufferId, + count_buffer_offset: BufferAddress, + max_count: u32, + ) { + pass.base + .commands + .push(RenderCommand::MultiDrawIndirectCount { + buffer_id, + offset, + count_buffer_id, + count_buffer_offset, + max_count, + indexed: false, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_multi_draw_indexed_indirect_count( + pass: &mut RenderPass, + buffer_id: id::BufferId, + offset: BufferAddress, + count_buffer_id: id::BufferId, + count_buffer_offset: BufferAddress, + max_count: u32, + ) { + pass.base + .commands + .push(RenderCommand::MultiDrawIndirectCount { + buffer_id, + offset, + count_buffer_id, + count_buffer_offset, + max_count, + indexed: true, + }); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given `label` + /// is a valid null-terminated string. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_push_debug_group( + pass: &mut RenderPass, + label: RawString, + color: u32, + ) { + let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pass.base.string_data.extend_from_slice(bytes); + + pass.base.commands.push(RenderCommand::PushDebugGroup { + color, + len: bytes.len(), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_pop_debug_group(pass: &mut RenderPass) { + pass.base.commands.push(RenderCommand::PopDebugGroup); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given `label` + /// is a valid null-terminated string. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_insert_debug_marker( + pass: &mut RenderPass, + label: RawString, + color: u32, + ) { + let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pass.base.string_data.extend_from_slice(bytes); + + pass.base.commands.push(RenderCommand::InsertDebugMarker { + color, + len: bytes.len(), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_write_timestamp( + pass: &mut RenderPass, + query_set_id: id::QuerySetId, + query_index: u32, + ) { + pass.base.commands.push(RenderCommand::WriteTimestamp { + query_set_id, + query_index, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_begin_pipeline_statistics_query( + pass: &mut RenderPass, + query_set_id: id::QuerySetId, + query_index: u32, + ) { + pass.base + .commands + .push(RenderCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_end_pipeline_statistics_query(pass: &mut RenderPass) { + pass.base + .commands + .push(RenderCommand::EndPipelineStatisticsQuery); + } + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `render_bundle_ids_length` elements. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_execute_bundles( + pass: &mut RenderPass, + render_bundle_ids: *const id::RenderBundleId, + render_bundle_ids_length: usize, + ) { + for &bundle_id in + unsafe { slice::from_raw_parts(render_bundle_ids, render_bundle_ids_length) } + { + pass.base + .commands + .push(RenderCommand::ExecuteBundle(bundle_id)); + } + pass.current_pipeline.reset(); + pass.current_bind_groups.reset(); + } +} diff --git a/third_party/rust/wgpu-core/src/command/transfer.rs b/third_party/rust/wgpu-core/src/command/transfer.rs new file mode 100644 index 0000000000..70c01a9c46 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/transfer.rs @@ -0,0 +1,1132 @@ +#[cfg(feature = "trace")] +use crate::device::trace::Command as TraceCommand; +use crate::{ + command::{clear_texture, CommandBuffer, CommandEncoderError}, + conv, + device::{Device, MissingDownlevelFlags}, + error::{ErrorFormatter, PrettyError}, + hub::{Global, GlobalIdentityHandlerFactory, HalApi, Storage, Token}, + id::{BufferId, CommandEncoderId, TextureId, Valid}, + init_tracker::{ + has_copy_partial_init_tracker_coverage, MemoryInitKind, TextureInitRange, + TextureInitTrackerAction, + }, + resource::{Texture, TextureErrorDimension}, + track::TextureSelector, +}; + +use arrayvec::ArrayVec; +use hal::CommandEncoder as _; +use thiserror::Error; +use wgt::{BufferAddress, BufferUsages, Extent3d, TextureUsages}; + +use std::iter; + +pub type ImageCopyBuffer = wgt::ImageCopyBuffer<BufferId>; +pub type ImageCopyTexture = wgt::ImageCopyTexture<TextureId>; + +#[derive(Clone, Copy, Debug)] +pub enum CopySide { + Source, + Destination, +} + +/// Error encountered while attempting a data transfer. +#[derive(Clone, Debug, Error)] +pub enum TransferError { + #[error("buffer {0:?} is invalid or destroyed")] + InvalidBuffer(BufferId), + #[error("texture {0:?} is invalid or destroyed")] + InvalidTexture(TextureId), + #[error("Source and destination cannot be the same buffer")] + SameSourceDestinationBuffer, + #[error("source buffer/texture is missing the `COPY_SRC` usage flag")] + MissingCopySrcUsageFlag, + #[error("destination buffer/texture is missing the `COPY_DST` usage flag")] + MissingCopyDstUsageFlag(Option<BufferId>, Option<TextureId>), + #[error("copy of {start_offset}..{end_offset} would end up overrunning the bounds of the {side:?} buffer of size {buffer_size}")] + BufferOverrun { + start_offset: BufferAddress, + end_offset: BufferAddress, + buffer_size: BufferAddress, + side: CopySide, + }, + #[error("copy of {dimension:?} {start_offset}..{end_offset} would end up overrunning the bounds of the {side:?} texture of {dimension:?} size {texture_size}")] + TextureOverrun { + start_offset: u32, + end_offset: u32, + texture_size: u32, + dimension: TextureErrorDimension, + side: CopySide, + }, + #[error("unable to select texture aspect {aspect:?} from fromat {format:?}")] + InvalidTextureAspect { + format: wgt::TextureFormat, + aspect: wgt::TextureAspect, + }, + #[error("unable to select texture mip level {level} out of {total}")] + InvalidTextureMipLevel { level: u32, total: u32 }, + #[error("buffer offset {0} is not aligned to block size or `COPY_BUFFER_ALIGNMENT`")] + UnalignedBufferOffset(BufferAddress), + #[error("copy size {0} does not respect `COPY_BUFFER_ALIGNMENT`")] + UnalignedCopySize(BufferAddress), + #[error("copy width is not a multiple of block width")] + UnalignedCopyWidth, + #[error("copy height is not a multiple of block height")] + UnalignedCopyHeight, + #[error("copy origin's x component is not a multiple of block width")] + UnalignedCopyOriginX, + #[error("copy origin's y component is not a multiple of block height")] + UnalignedCopyOriginY, + #[error("bytes per row does not respect `COPY_BYTES_PER_ROW_ALIGNMENT`")] + UnalignedBytesPerRow, + #[error("number of bytes per row needs to be specified since more than one row is copied")] + UnspecifiedBytesPerRow, + #[error("number of rows per image needs to be specified since more than one image is copied")] + UnspecifiedRowsPerImage, + #[error("number of bytes per row is less than the number of bytes in a complete row")] + InvalidBytesPerRow, + #[error("image is 1D and the copy height and depth are not both set to 1")] + InvalidCopySize, + #[error("number of rows per image is invalid")] + InvalidRowsPerImage, + #[error("source and destination layers have different aspects")] + MismatchedAspects, + #[error("copying from textures with format {format:?} and aspect {aspect:?} is forbidden")] + CopyFromForbiddenTextureFormat { + format: wgt::TextureFormat, + aspect: wgt::TextureAspect, + }, + #[error("copying to textures with format {format:?} and aspect {aspect:?} is forbidden")] + CopyToForbiddenTextureFormat { + format: wgt::TextureFormat, + aspect: wgt::TextureAspect, + }, + #[error("the entire texture must be copied when copying from depth texture")] + InvalidDepthTextureExtent, + #[error( + "source format ({src_format:?}) and destination format ({dst_format:?}) are different" + )] + MismatchedTextureFormats { + src_format: wgt::TextureFormat, + dst_format: wgt::TextureFormat, + }, + #[error(transparent)] + MemoryInitFailure(#[from] super::ClearError), + #[error("Cannot encode this copy because of a missing downelevel flag")] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), + #[error("Source texture sample count must be 1, got {sample_count}")] + InvalidSampleCount { sample_count: u32 }, + #[error("Requested mip level {requested} does no exist (count: {count})")] + InvalidMipLevel { requested: u32, count: u32 }, +} + +impl PrettyError for TransferError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + match *self { + Self::InvalidBuffer(id) => { + fmt.buffer_label(&id); + } + Self::InvalidTexture(id) => { + fmt.texture_label(&id); + } + // Self::MissingCopySrcUsageFlag(buf_opt, tex_opt) => { + // if let Some(buf) = buf_opt { + // let name = crate::gfx_select!(buf => global.buffer_label(buf)); + // ret.push_str(&format_label_line("source", &name)); + // } + // if let Some(tex) = tex_opt { + // let name = crate::gfx_select!(tex => global.texture_label(tex)); + // ret.push_str(&format_label_line("source", &name)); + // } + // } + Self::MissingCopyDstUsageFlag(buf_opt, tex_opt) => { + if let Some(buf) = buf_opt { + fmt.buffer_label_with_key(&buf, "destination"); + } + if let Some(tex) = tex_opt { + fmt.texture_label_with_key(&tex, "destination"); + } + } + _ => {} + }; + } +} +/// Error encountered while attempting to do a copy on a command encoder. +#[derive(Clone, Debug, Error)] +pub enum CopyError { + #[error(transparent)] + Encoder(#[from] CommandEncoderError), + #[error("Copy error")] + Transfer(#[from] TransferError), +} + +pub(crate) fn extract_texture_selector<A: hal::Api>( + copy_texture: &ImageCopyTexture, + copy_size: &Extent3d, + texture: &Texture<A>, +) -> Result<(TextureSelector, hal::TextureCopyBase, wgt::TextureFormat), TransferError> { + let format = texture.desc.format; + let copy_aspect = + hal::FormatAspects::from(format) & hal::FormatAspects::from(copy_texture.aspect); + if copy_aspect.is_empty() { + return Err(TransferError::InvalidTextureAspect { + format, + aspect: copy_texture.aspect, + }); + } + + let (layers, origin_z) = match texture.desc.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => ( + copy_texture.origin.z..copy_texture.origin.z + copy_size.depth_or_array_layers, + 0, + ), + wgt::TextureDimension::D3 => (0..1, copy_texture.origin.z), + }; + let base = hal::TextureCopyBase { + origin: wgt::Origin3d { + x: copy_texture.origin.x, + y: copy_texture.origin.y, + z: origin_z, + }, + // this value will be incremented per copied layer + array_layer: layers.start, + mip_level: copy_texture.mip_level, + aspect: copy_aspect, + }; + let selector = TextureSelector { + mips: copy_texture.mip_level..copy_texture.mip_level + 1, + layers, + }; + + Ok((selector, base, format)) +} + +/// WebGPU's [validating linear texture data][vltd] algorithm. +/// +/// Copied with some modifications from WebGPU standard. +/// +/// If successful, returns a pair `(bytes, stride)`, where: +/// - `bytes` is the number of buffer bytes required for this copy, and +/// - `stride` number of bytes between array layers. +/// +/// [vltd]: https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-linear-texture-data +pub(crate) fn validate_linear_texture_data( + layout: &wgt::ImageDataLayout, + format: wgt::TextureFormat, + buffer_size: BufferAddress, + buffer_side: CopySide, + bytes_per_block: BufferAddress, + copy_size: &Extent3d, + need_copy_aligned_rows: bool, +) -> Result<(BufferAddress, BufferAddress), TransferError> { + // Convert all inputs to BufferAddress (u64) to avoid some of the overflow issues + // Note: u64 is not always enough to prevent overflow, especially when multiplying + // something with a potentially large depth value, so it is preferrable to validate + // the copy size before calling this function (for example via `validate_texture_copy_range`). + let copy_width = copy_size.width as BufferAddress; + let copy_height = copy_size.height as BufferAddress; + let copy_depth = copy_size.depth_or_array_layers as BufferAddress; + + let offset = layout.offset; + + let (block_width, block_height) = format.describe().block_dimensions; + let block_width = block_width as BufferAddress; + let block_height = block_height as BufferAddress; + let block_size = bytes_per_block; + + let width_in_blocks = copy_width / block_width; + let height_in_blocks = copy_height / block_height; + + let bytes_per_row = if let Some(bytes_per_row) = layout.bytes_per_row { + bytes_per_row.get() as BufferAddress + } else { + if copy_depth > 1 || height_in_blocks > 1 { + return Err(TransferError::UnspecifiedBytesPerRow); + } + bytes_per_block * width_in_blocks + }; + let block_rows_per_image = if let Some(rows_per_image) = layout.rows_per_image { + rows_per_image.get() as BufferAddress + } else { + if copy_depth > 1 { + return Err(TransferError::UnspecifiedRowsPerImage); + } + copy_height / block_height + }; + let rows_per_image = block_rows_per_image * block_height; + + if copy_width % block_width != 0 { + return Err(TransferError::UnalignedCopyWidth); + } + if copy_height % block_height != 0 { + return Err(TransferError::UnalignedCopyHeight); + } + + if need_copy_aligned_rows { + let bytes_per_row_alignment = wgt::COPY_BYTES_PER_ROW_ALIGNMENT as BufferAddress; + + if bytes_per_row_alignment % bytes_per_block != 0 { + return Err(TransferError::UnalignedBytesPerRow); + } + if bytes_per_row % bytes_per_row_alignment != 0 { + return Err(TransferError::UnalignedBytesPerRow); + } + } + + let bytes_in_last_row = block_size * width_in_blocks; + let bytes_per_image = bytes_per_row * block_rows_per_image; + let required_bytes_in_copy = if copy_width == 0 || copy_height == 0 || copy_depth == 0 { + 0 + } else { + let bytes_in_last_slice = bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row; + bytes_per_image * (copy_depth - 1) + bytes_in_last_slice + }; + + if rows_per_image < copy_height { + return Err(TransferError::InvalidRowsPerImage); + } + if offset + required_bytes_in_copy > buffer_size { + return Err(TransferError::BufferOverrun { + start_offset: offset, + end_offset: offset + required_bytes_in_copy, + buffer_size, + side: buffer_side, + }); + } + if offset % block_size != 0 { + return Err(TransferError::UnalignedBufferOffset(offset)); + } + if copy_height > 1 && bytes_per_row < bytes_in_last_row { + return Err(TransferError::InvalidBytesPerRow); + } + Ok((required_bytes_in_copy, bytes_per_image)) +} + +/// WebGPU's [validating texture copy range][vtcr] algorithm. +/// +/// Copied with minor modifications from WebGPU standard. +/// +/// Returns the HAL copy extent and the layer count. +/// +/// [vtcr]: https://gpuweb.github.io/gpuweb/#valid-texture-copy-range +pub(crate) fn validate_texture_copy_range( + texture_copy_view: &ImageCopyTexture, + desc: &wgt::TextureDescriptor<()>, + texture_side: CopySide, + copy_size: &Extent3d, +) -> Result<(hal::CopyExtent, u32), TransferError> { + let (block_width, block_height) = desc.format.describe().block_dimensions; + let block_width = block_width as u32; + let block_height = block_height as u32; + + let extent_virtual = desc.mip_level_size(texture_copy_view.mip_level).ok_or( + TransferError::InvalidTextureMipLevel { + level: texture_copy_view.mip_level, + total: desc.mip_level_count, + }, + )?; + // physical size can be larger than the virtual + let extent = extent_virtual.physical_size(desc.format); + + match desc.format { + wgt::TextureFormat::Stencil8 + | wgt::TextureFormat::Depth16Unorm + | wgt::TextureFormat::Depth32Float + | wgt::TextureFormat::Depth32FloatStencil8 + | wgt::TextureFormat::Depth24Plus + | wgt::TextureFormat::Depth24PlusStencil8 => { + if *copy_size != extent { + return Err(TransferError::InvalidDepthTextureExtent); + } + } + _ => {} + } + + /// Return `Ok` if a run `size` texels long starting at `start_offset` falls + /// entirely within `texture_size`. Otherwise, return an appropriate a`Err`. + fn check_dimension( + dimension: TextureErrorDimension, + side: CopySide, + start_offset: u32, + size: u32, + texture_size: u32, + ) -> Result<(), TransferError> { + // Avoid underflow in the subtraction by checking start_offset against + // texture_size first. + if start_offset <= texture_size && size <= texture_size - start_offset { + Ok(()) + } else { + Err(TransferError::TextureOverrun { + start_offset, + end_offset: start_offset.wrapping_add(size), + texture_size, + dimension, + side, + }) + } + } + + check_dimension( + TextureErrorDimension::X, + texture_side, + texture_copy_view.origin.x, + copy_size.width, + extent.width, + )?; + check_dimension( + TextureErrorDimension::Y, + texture_side, + texture_copy_view.origin.y, + copy_size.height, + extent.height, + )?; + check_dimension( + TextureErrorDimension::Z, + texture_side, + texture_copy_view.origin.z, + copy_size.depth_or_array_layers, + extent.depth_or_array_layers, + )?; + + if texture_copy_view.origin.x % block_width != 0 { + return Err(TransferError::UnalignedCopyOriginX); + } + if texture_copy_view.origin.y % block_height != 0 { + return Err(TransferError::UnalignedCopyOriginY); + } + if copy_size.width % block_width != 0 { + return Err(TransferError::UnalignedCopyWidth); + } + if copy_size.height % block_height != 0 { + return Err(TransferError::UnalignedCopyHeight); + } + + let (depth, array_layer_count) = match desc.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => { + (1, copy_size.depth_or_array_layers) + } + wgt::TextureDimension::D3 => (copy_size.depth_or_array_layers, 1), + }; + + let copy_extent = hal::CopyExtent { + width: copy_size.width, + height: copy_size.height, + depth, + }; + Ok((copy_extent, array_layer_count)) +} + +fn handle_texture_init<A: HalApi>( + init_kind: MemoryInitKind, + cmd_buf: &mut CommandBuffer<A>, + device: &Device<A>, + copy_texture: &ImageCopyTexture, + copy_size: &Extent3d, + texture_guard: &Storage<Texture<A>, TextureId>, +) { + let init_action = TextureInitTrackerAction { + id: copy_texture.texture, + range: TextureInitRange { + mip_range: copy_texture.mip_level..copy_texture.mip_level + 1, + layer_range: copy_texture.origin.z + ..(copy_texture.origin.z + copy_size.depth_or_array_layers), + }, + kind: init_kind, + }; + + // Register the init action. + let immediate_inits = cmd_buf + .texture_memory_actions + .register_init_action(&{ init_action }, texture_guard); + + // In rare cases we may need to insert an init operation immediately onto the command buffer. + if !immediate_inits.is_empty() { + let cmd_buf_raw = cmd_buf.encoder.open(); + for init in immediate_inits { + clear_texture( + texture_guard, + Valid(init.texture), + TextureInitRange { + mip_range: init.mip_level..(init.mip_level + 1), + layer_range: init.layer..(init.layer + 1), + }, + cmd_buf_raw, + &mut cmd_buf.trackers.textures, + &device.alignments, + &device.zero_buffer, + ) + .unwrap(); + } + } +} + +/// Prepare a transfer's source texture. +/// +/// Ensure the source texture of a transfer is in the right initialization +/// state, and record the state for after the transfer operation. +fn handle_src_texture_init<A: HalApi>( + cmd_buf: &mut CommandBuffer<A>, + device: &Device<A>, + source: &ImageCopyTexture, + copy_size: &Extent3d, + texture_guard: &Storage<Texture<A>, TextureId>, +) -> Result<(), TransferError> { + let _ = texture_guard + .get(source.texture) + .map_err(|_| TransferError::InvalidTexture(source.texture))?; + + handle_texture_init( + MemoryInitKind::NeedsInitializedMemory, + cmd_buf, + device, + source, + copy_size, + texture_guard, + ); + Ok(()) +} + +/// Prepare a transfer's destination texture. +/// +/// Ensure the destination texture of a transfer is in the right initialization +/// state, and record the state for after the transfer operation. +fn handle_dst_texture_init<A: HalApi>( + cmd_buf: &mut CommandBuffer<A>, + device: &Device<A>, + destination: &ImageCopyTexture, + copy_size: &Extent3d, + texture_guard: &Storage<Texture<A>, TextureId>, +) -> Result<(), TransferError> { + let texture = texture_guard + .get(destination.texture) + .map_err(|_| TransferError::InvalidTexture(destination.texture))?; + + // Attention: If we don't write full texture subresources, we need to a full + // clear first since we don't track subrects. This means that in rare cases + // even a *destination* texture of a transfer may need an immediate texture + // init. + let dst_init_kind = if has_copy_partial_init_tracker_coverage( + copy_size, + destination.mip_level, + &texture.desc, + ) { + MemoryInitKind::NeedsInitializedMemory + } else { + MemoryInitKind::ImplicitlyInitialized + }; + + handle_texture_init( + dst_init_kind, + cmd_buf, + device, + destination, + copy_size, + texture_guard, + ); + Ok(()) +} + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + pub fn command_encoder_copy_buffer_to_buffer<A: HalApi>( + &self, + command_encoder_id: CommandEncoderId, + source: BufferId, + source_offset: BufferAddress, + destination: BufferId, + destination_offset: BufferAddress, + size: BufferAddress, + ) -> Result<(), CopyError> { + profiling::scope!("CommandEncoder::copy_buffer_to_buffer"); + + if source == destination { + return Err(TransferError::SameSourceDestinationBuffer.into()); + } + let hub = A::hub(self); + let mut token = Token::root(); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); + let cmd_buf = CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, command_encoder_id)?; + let (buffer_guard, _) = hub.buffers.read(&mut token); + + let device = &device_guard[cmd_buf.device_id.value]; + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::CopyBufferToBuffer { + src: source, + src_offset: source_offset, + dst: destination, + dst_offset: destination_offset, + size, + }); + } + + let (src_buffer, src_pending) = cmd_buf + .trackers + .buffers + .set_single(&*buffer_guard, source, hal::BufferUses::COPY_SRC) + .ok_or(TransferError::InvalidBuffer(source))?; + let src_raw = src_buffer + .raw + .as_ref() + .ok_or(TransferError::InvalidBuffer(source))?; + if !src_buffer.usage.contains(BufferUsages::COPY_SRC) { + return Err(TransferError::MissingCopySrcUsageFlag.into()); + } + // expecting only a single barrier + let src_barrier = src_pending.map(|pending| pending.into_hal(src_buffer)); + + let (dst_buffer, dst_pending) = cmd_buf + .trackers + .buffers + .set_single(&*buffer_guard, destination, hal::BufferUses::COPY_DST) + .ok_or(TransferError::InvalidBuffer(destination))?; + let dst_raw = dst_buffer + .raw + .as_ref() + .ok_or(TransferError::InvalidBuffer(destination))?; + if !dst_buffer.usage.contains(BufferUsages::COPY_DST) { + return Err(TransferError::MissingCopyDstUsageFlag(Some(destination), None).into()); + } + let dst_barrier = dst_pending.map(|pending| pending.into_hal(dst_buffer)); + + if size % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedCopySize(size).into()); + } + if source_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedBufferOffset(source_offset).into()); + } + if destination_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedBufferOffset(destination_offset).into()); + } + if !device + .downlevel + .flags + .contains(wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER) + && (src_buffer.usage.contains(wgt::BufferUsages::INDEX) + || dst_buffer.usage.contains(wgt::BufferUsages::INDEX)) + { + let forbidden_usages = wgt::BufferUsages::VERTEX + | wgt::BufferUsages::UNIFORM + | wgt::BufferUsages::INDIRECT + | wgt::BufferUsages::STORAGE; + if src_buffer.usage.intersects(forbidden_usages) + || dst_buffer.usage.intersects(forbidden_usages) + { + return Err(TransferError::MissingDownlevelFlags(MissingDownlevelFlags( + wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER, + )) + .into()); + } + } + + let source_end_offset = source_offset + size; + let destination_end_offset = destination_offset + size; + if source_end_offset > src_buffer.size { + return Err(TransferError::BufferOverrun { + start_offset: source_offset, + end_offset: source_end_offset, + buffer_size: src_buffer.size, + side: CopySide::Source, + } + .into()); + } + if destination_end_offset > dst_buffer.size { + return Err(TransferError::BufferOverrun { + start_offset: destination_offset, + end_offset: destination_end_offset, + buffer_size: dst_buffer.size, + side: CopySide::Destination, + } + .into()); + } + + if size == 0 { + log::trace!("Ignoring copy_buffer_to_buffer of size 0"); + return Ok(()); + } + + // Make sure source is initialized memory and mark dest as initialized. + cmd_buf + .buffer_memory_init_actions + .extend(dst_buffer.initialization_status.create_action( + destination, + destination_offset..(destination_offset + size), + MemoryInitKind::ImplicitlyInitialized, + )); + cmd_buf + .buffer_memory_init_actions + .extend(src_buffer.initialization_status.create_action( + source, + source_offset..(source_offset + size), + MemoryInitKind::NeedsInitializedMemory, + )); + + let region = hal::BufferCopy { + src_offset: source_offset, + dst_offset: destination_offset, + size: wgt::BufferSize::new(size).unwrap(), + }; + let cmd_buf_raw = cmd_buf.encoder.open(); + unsafe { + cmd_buf_raw.transition_buffers(src_barrier.into_iter().chain(dst_barrier)); + cmd_buf_raw.copy_buffer_to_buffer(src_raw, dst_raw, iter::once(region)); + } + Ok(()) + } + + pub fn command_encoder_copy_buffer_to_texture<A: HalApi>( + &self, + command_encoder_id: CommandEncoderId, + source: &ImageCopyBuffer, + destination: &ImageCopyTexture, + copy_size: &Extent3d, + ) -> Result<(), CopyError> { + profiling::scope!("CommandEncoder::copy_buffer_to_texture"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); + let cmd_buf = CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, command_encoder_id)?; + let (buffer_guard, mut token) = hub.buffers.read(&mut token); + let (texture_guard, _) = hub.textures.read(&mut token); + + let device = &device_guard[cmd_buf.device_id.value]; + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::CopyBufferToTexture { + src: source.clone(), + dst: destination.clone(), + size: *copy_size, + }); + } + + if copy_size.width == 0 || copy_size.height == 0 || copy_size.depth_or_array_layers == 0 { + log::trace!("Ignoring copy_buffer_to_texture of size 0"); + return Ok(()); + } + + let dst_texture = texture_guard + .get(destination.texture) + .map_err(|_| TransferError::InvalidTexture(destination.texture))?; + + let (hal_copy_size, array_layer_count) = validate_texture_copy_range( + destination, + &dst_texture.desc, + CopySide::Destination, + copy_size, + )?; + + let (dst_range, dst_base, _) = + extract_texture_selector(destination, copy_size, dst_texture)?; + + // Handle texture init *before* dealing with barrier transitions so we + // have an easier time inserting "immediate-inits" that may be required + // by prior discards in rare cases. + handle_dst_texture_init(cmd_buf, device, destination, copy_size, &texture_guard)?; + + let (src_buffer, src_pending) = cmd_buf + .trackers + .buffers + .set_single(&*buffer_guard, source.buffer, hal::BufferUses::COPY_SRC) + .ok_or(TransferError::InvalidBuffer(source.buffer))?; + let src_raw = src_buffer + .raw + .as_ref() + .ok_or(TransferError::InvalidBuffer(source.buffer))?; + if !src_buffer.usage.contains(BufferUsages::COPY_SRC) { + return Err(TransferError::MissingCopySrcUsageFlag.into()); + } + let src_barrier = src_pending.map(|pending| pending.into_hal(src_buffer)); + + let dst_pending = cmd_buf + .trackers + .textures + .set_single( + dst_texture, + destination.texture, + dst_range, + hal::TextureUses::COPY_DST, + ) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + let dst_raw = dst_texture + .inner + .as_raw() + .ok_or(TransferError::InvalidTexture(destination.texture))?; + if !dst_texture.desc.usage.contains(TextureUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), + ); + } + let dst_barrier = dst_pending.map(|pending| pending.into_hal(dst_texture)); + + let format_desc = dst_texture.desc.format.describe(); + let (required_buffer_bytes_in_copy, bytes_per_array_layer) = validate_linear_texture_data( + &source.layout, + dst_texture.desc.format, + src_buffer.size, + CopySide::Source, + format_desc.block_size as BufferAddress, + copy_size, + true, + )?; + + if !conv::is_valid_copy_dst_texture_format(dst_texture.desc.format, destination.aspect) { + return Err(TransferError::CopyToForbiddenTextureFormat { + format: dst_texture.desc.format, + aspect: destination.aspect, + } + .into()); + } + + cmd_buf + .buffer_memory_init_actions + .extend(src_buffer.initialization_status.create_action( + source.buffer, + source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy), + MemoryInitKind::NeedsInitializedMemory, + )); + + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = dst_base.clone(); + texture_base.array_layer += rel_array_layer; + let mut buffer_layout = source.layout; + buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer; + hal::BufferTextureCopy { + buffer_layout, + texture_base, + size: hal_copy_size, + } + }); + + let cmd_buf_raw = cmd_buf.encoder.open(); + unsafe { + cmd_buf_raw.transition_textures(dst_barrier.into_iter()); + cmd_buf_raw.transition_buffers(src_barrier.into_iter()); + cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, regions); + } + Ok(()) + } + + pub fn command_encoder_copy_texture_to_buffer<A: HalApi>( + &self, + command_encoder_id: CommandEncoderId, + source: &ImageCopyTexture, + destination: &ImageCopyBuffer, + copy_size: &Extent3d, + ) -> Result<(), CopyError> { + profiling::scope!("CommandEncoder::copy_texture_to_buffer"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); + let cmd_buf = CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, command_encoder_id)?; + let (buffer_guard, mut token) = hub.buffers.read(&mut token); + let (texture_guard, _) = hub.textures.read(&mut token); + + let device = &device_guard[cmd_buf.device_id.value]; + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::CopyTextureToBuffer { + src: source.clone(), + dst: destination.clone(), + size: *copy_size, + }); + } + + if copy_size.width == 0 || copy_size.height == 0 || copy_size.depth_or_array_layers == 0 { + log::trace!("Ignoring copy_texture_to_buffer of size 0"); + return Ok(()); + } + + let src_texture = texture_guard + .get(source.texture) + .map_err(|_| TransferError::InvalidTexture(source.texture))?; + + let (hal_copy_size, array_layer_count) = + validate_texture_copy_range(source, &src_texture.desc, CopySide::Source, copy_size)?; + + let (src_range, src_base, _) = extract_texture_selector(source, copy_size, src_texture)?; + + // Handle texture init *before* dealing with barrier transitions so we + // have an easier time inserting "immediate-inits" that may be required + // by prior discards in rare cases. + handle_src_texture_init(cmd_buf, device, source, copy_size, &texture_guard)?; + + let src_pending = cmd_buf + .trackers + .textures + .set_single( + src_texture, + source.texture, + src_range, + hal::TextureUses::COPY_SRC, + ) + .ok_or(TransferError::InvalidTexture(source.texture))?; + let src_raw = src_texture + .inner + .as_raw() + .ok_or(TransferError::InvalidTexture(source.texture))?; + if !src_texture.desc.usage.contains(TextureUsages::COPY_SRC) { + return Err(TransferError::MissingCopySrcUsageFlag.into()); + } + if src_texture.desc.sample_count != 1 { + return Err(TransferError::InvalidSampleCount { + sample_count: src_texture.desc.sample_count, + } + .into()); + } + if source.mip_level >= src_texture.desc.mip_level_count { + return Err(TransferError::InvalidMipLevel { + requested: source.mip_level, + count: src_texture.desc.mip_level_count, + } + .into()); + } + let src_barrier = src_pending.map(|pending| pending.into_hal(src_texture)); + + let (dst_buffer, dst_pending) = cmd_buf + .trackers + .buffers + .set_single( + &*buffer_guard, + destination.buffer, + hal::BufferUses::COPY_DST, + ) + .ok_or(TransferError::InvalidBuffer(destination.buffer))?; + let dst_raw = dst_buffer + .raw + .as_ref() + .ok_or(TransferError::InvalidBuffer(destination.buffer))?; + if !dst_buffer.usage.contains(BufferUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(Some(destination.buffer), None).into(), + ); + } + let dst_barrier = dst_pending.map(|pending| pending.into_hal(dst_buffer)); + + let format_desc = src_texture.desc.format.describe(); + let (required_buffer_bytes_in_copy, bytes_per_array_layer) = validate_linear_texture_data( + &destination.layout, + src_texture.desc.format, + dst_buffer.size, + CopySide::Destination, + format_desc.block_size as BufferAddress, + copy_size, + true, + )?; + + if !conv::is_valid_copy_src_texture_format(src_texture.desc.format, source.aspect) { + return Err(TransferError::CopyFromForbiddenTextureFormat { + format: src_texture.desc.format, + aspect: source.aspect, + } + .into()); + } + + if format_desc.sample_type == wgt::TextureSampleType::Depth + && !device + .downlevel + .flags + .contains(wgt::DownlevelFlags::DEPTH_TEXTURE_AND_BUFFER_COPIES) + { + return Err(TransferError::MissingDownlevelFlags(MissingDownlevelFlags( + wgt::DownlevelFlags::DEPTH_TEXTURE_AND_BUFFER_COPIES, + )) + .into()); + } + + cmd_buf + .buffer_memory_init_actions + .extend(dst_buffer.initialization_status.create_action( + destination.buffer, + destination.layout.offset + ..(destination.layout.offset + required_buffer_bytes_in_copy), + MemoryInitKind::ImplicitlyInitialized, + )); + + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = src_base.clone(); + texture_base.array_layer += rel_array_layer; + let mut buffer_layout = destination.layout; + buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer; + hal::BufferTextureCopy { + buffer_layout, + texture_base, + size: hal_copy_size, + } + }); + let cmd_buf_raw = cmd_buf.encoder.open(); + unsafe { + cmd_buf_raw.transition_buffers(dst_barrier.into_iter()); + cmd_buf_raw.transition_textures(src_barrier.into_iter()); + cmd_buf_raw.copy_texture_to_buffer( + src_raw, + hal::TextureUses::COPY_SRC, + dst_raw, + regions, + ); + } + Ok(()) + } + + pub fn command_encoder_copy_texture_to_texture<A: HalApi>( + &self, + command_encoder_id: CommandEncoderId, + source: &ImageCopyTexture, + destination: &ImageCopyTexture, + copy_size: &Extent3d, + ) -> Result<(), CopyError> { + profiling::scope!("CommandEncoder::copy_texture_to_texture"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); + let cmd_buf = CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, command_encoder_id)?; + let (_, mut token) = hub.buffers.read(&mut token); // skip token + let (texture_guard, _) = hub.textures.read(&mut token); + + let device = &device_guard[cmd_buf.device_id.value]; + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::CopyTextureToTexture { + src: source.clone(), + dst: destination.clone(), + size: *copy_size, + }); + } + + if copy_size.width == 0 || copy_size.height == 0 || copy_size.depth_or_array_layers == 0 { + log::trace!("Ignoring copy_texture_to_texture of size 0"); + return Ok(()); + } + + let src_texture = texture_guard + .get(source.texture) + .map_err(|_| TransferError::InvalidTexture(source.texture))?; + let dst_texture = texture_guard + .get(destination.texture) + .map_err(|_| TransferError::InvalidTexture(source.texture))?; + + // src and dst texture format must be the same. + let src_format = src_texture.desc.format; + let dst_format = dst_texture.desc.format; + if src_format != dst_format { + return Err(TransferError::MismatchedTextureFormats { + src_format, + dst_format, + } + .into()); + } + + let (src_copy_size, array_layer_count) = + validate_texture_copy_range(source, &src_texture.desc, CopySide::Source, copy_size)?; + let (dst_copy_size, _) = validate_texture_copy_range( + destination, + &dst_texture.desc, + CopySide::Destination, + copy_size, + )?; + + let (src_range, src_tex_base, _) = + extract_texture_selector(source, copy_size, src_texture)?; + let (dst_range, dst_tex_base, _) = + extract_texture_selector(destination, copy_size, dst_texture)?; + if src_tex_base.aspect != dst_tex_base.aspect { + return Err(TransferError::MismatchedAspects.into()); + } + + // Handle texture init *before* dealing with barrier transitions so we + // have an easier time inserting "immediate-inits" that may be required + // by prior discards in rare cases. + handle_src_texture_init(cmd_buf, device, source, copy_size, &texture_guard)?; + handle_dst_texture_init(cmd_buf, device, destination, copy_size, &texture_guard)?; + + let src_pending = cmd_buf + .trackers + .textures + .set_single( + src_texture, + source.texture, + src_range, + hal::TextureUses::COPY_SRC, + ) + .ok_or(TransferError::InvalidTexture(source.texture))?; + let src_raw = src_texture + .inner + .as_raw() + .ok_or(TransferError::InvalidTexture(source.texture))?; + if !src_texture.desc.usage.contains(TextureUsages::COPY_SRC) { + return Err(TransferError::MissingCopySrcUsageFlag.into()); + } + + //TODO: try to avoid this the collection. It's needed because both + // `src_pending` and `dst_pending` try to hold `trackers.textures` mutably. + let mut barriers: ArrayVec<_, 2> = src_pending + .map(|pending| pending.into_hal(src_texture)) + .into_iter() + .collect(); + + let dst_pending = cmd_buf + .trackers + .textures + .set_single( + dst_texture, + destination.texture, + dst_range, + hal::TextureUses::COPY_DST, + ) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + let dst_raw = dst_texture + .inner + .as_raw() + .ok_or(TransferError::InvalidTexture(destination.texture))?; + if !dst_texture.desc.usage.contains(TextureUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), + ); + } + + barriers.extend(dst_pending.map(|pending| pending.into_hal(dst_texture))); + + let hal_copy_size = hal::CopyExtent { + width: src_copy_size.width.min(dst_copy_size.width), + height: src_copy_size.height.min(dst_copy_size.height), + depth: src_copy_size.depth.min(dst_copy_size.depth), + }; + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut src_base = src_tex_base.clone(); + let mut dst_base = dst_tex_base.clone(); + src_base.array_layer += rel_array_layer; + dst_base.array_layer += rel_array_layer; + hal::TextureCopy { + src_base, + dst_base, + size: hal_copy_size, + } + }); + let cmd_buf_raw = cmd_buf.encoder.open(); + unsafe { + cmd_buf_raw.transition_textures(barriers.into_iter()); + cmd_buf_raw.copy_texture_to_texture( + src_raw, + hal::TextureUses::COPY_SRC, + dst_raw, + regions, + ); + } + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/conv.rs b/third_party/rust/wgpu-core/src/conv.rs new file mode 100644 index 0000000000..e83c0b6c79 --- /dev/null +++ b/third_party/rust/wgpu-core/src/conv.rs @@ -0,0 +1,169 @@ +use crate::resource; + +pub fn is_power_of_two_u16(val: u16) -> bool { + val != 0 && (val & (val - 1)) == 0 +} + +pub fn is_power_of_two_u32(val: u32) -> bool { + val != 0 && (val & (val - 1)) == 0 +} + +pub fn is_valid_copy_src_texture_format( + format: wgt::TextureFormat, + aspect: wgt::TextureAspect, +) -> bool { + use wgt::TextureAspect as Ta; + use wgt::TextureFormat as Tf; + match (format, aspect) { + (Tf::Depth24Plus, _) | (Tf::Depth24PlusStencil8, Ta::DepthOnly) => false, + _ => true, + } +} + +pub fn is_valid_copy_dst_texture_format( + format: wgt::TextureFormat, + aspect: wgt::TextureAspect, +) -> bool { + use wgt::TextureAspect as Ta; + use wgt::TextureFormat as Tf; + match (format, aspect) { + (Tf::Depth24Plus | Tf::Depth32Float, _) + | (Tf::Depth24PlusStencil8 | Tf::Depth32FloatStencil8, Ta::DepthOnly) => false, + _ => true, + } +} + +pub fn map_buffer_usage(usage: wgt::BufferUsages) -> hal::BufferUses { + let mut u = hal::BufferUses::empty(); + u.set( + hal::BufferUses::MAP_READ, + usage.contains(wgt::BufferUsages::MAP_READ), + ); + u.set( + hal::BufferUses::MAP_WRITE, + usage.contains(wgt::BufferUsages::MAP_WRITE), + ); + u.set( + hal::BufferUses::COPY_SRC, + usage.contains(wgt::BufferUsages::COPY_SRC), + ); + u.set( + hal::BufferUses::COPY_DST, + usage.contains(wgt::BufferUsages::COPY_DST), + ); + u.set( + hal::BufferUses::INDEX, + usage.contains(wgt::BufferUsages::INDEX), + ); + u.set( + hal::BufferUses::VERTEX, + usage.contains(wgt::BufferUsages::VERTEX), + ); + u.set( + hal::BufferUses::UNIFORM, + usage.contains(wgt::BufferUsages::UNIFORM), + ); + u.set( + hal::BufferUses::STORAGE_READ | hal::BufferUses::STORAGE_READ_WRITE, + usage.contains(wgt::BufferUsages::STORAGE), + ); + u.set( + hal::BufferUses::INDIRECT, + usage.contains(wgt::BufferUsages::INDIRECT), + ); + u +} + +pub fn map_texture_usage( + usage: wgt::TextureUsages, + aspect: hal::FormatAspects, +) -> hal::TextureUses { + let mut u = hal::TextureUses::empty(); + u.set( + hal::TextureUses::COPY_SRC, + usage.contains(wgt::TextureUsages::COPY_SRC), + ); + u.set( + hal::TextureUses::COPY_DST, + usage.contains(wgt::TextureUsages::COPY_DST), + ); + u.set( + hal::TextureUses::RESOURCE, + usage.contains(wgt::TextureUsages::TEXTURE_BINDING), + ); + u.set( + hal::TextureUses::STORAGE_READ | hal::TextureUses::STORAGE_READ_WRITE, + usage.contains(wgt::TextureUsages::STORAGE_BINDING), + ); + let is_color = aspect.contains(hal::FormatAspects::COLOR); + u.set( + hal::TextureUses::COLOR_TARGET, + usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) && is_color, + ); + u.set( + hal::TextureUses::DEPTH_STENCIL_READ | hal::TextureUses::DEPTH_STENCIL_WRITE, + usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) && !is_color, + ); + u +} + +pub fn check_texture_dimension_size( + dimension: wgt::TextureDimension, + wgt::Extent3d { + width, + height, + depth_or_array_layers, + }: wgt::Extent3d, + sample_size: u32, + limits: &wgt::Limits, +) -> Result<(), resource::TextureDimensionError> { + use resource::{TextureDimensionError as Tde, TextureErrorDimension as Ted}; + use wgt::TextureDimension::*; + + let (extent_limits, sample_limit) = match dimension { + D1 => ([limits.max_texture_dimension_1d, 1, 1], 1), + D2 => ( + [ + limits.max_texture_dimension_2d, + limits.max_texture_dimension_2d, + limits.max_texture_array_layers, + ], + 32, + ), + D3 => ( + [ + limits.max_texture_dimension_3d, + limits.max_texture_dimension_3d, + limits.max_texture_dimension_3d, + ], + 1, + ), + }; + + for (&dim, (&given, &limit)) in [Ted::X, Ted::Y, Ted::Z].iter().zip( + [width, height, depth_or_array_layers] + .iter() + .zip(extent_limits.iter()), + ) { + if given == 0 { + return Err(Tde::Zero(dim)); + } + if given > limit { + return Err(Tde::LimitExceeded { dim, given, limit }); + } + } + if sample_size == 0 || sample_size > sample_limit || !is_power_of_two_u32(sample_size) { + return Err(Tde::InvalidSampleCount(sample_size)); + } + + Ok(()) +} + +pub fn bind_group_layout_flags(features: wgt::Features) -> hal::BindGroupLayoutFlags { + let mut flags = hal::BindGroupLayoutFlags::empty(); + flags.set( + hal::BindGroupLayoutFlags::PARTIALLY_BOUND, + features.contains(wgt::Features::PARTIALLY_BOUND_BINDING_ARRAY), + ); + flags +} diff --git a/third_party/rust/wgpu-core/src/device/life.rs b/third_party/rust/wgpu-core/src/device/life.rs new file mode 100644 index 0000000000..605aea3dab --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/life.rs @@ -0,0 +1,910 @@ +#[cfg(feature = "trace")] +use crate::device::trace; +use crate::{ + device::{ + queue::{EncoderInFlight, SubmittedWorkDoneClosure, TempResource}, + DeviceError, + }, + hub::{GlobalIdentityHandlerFactory, HalApi, Hub, Token}, + id, resource, + track::{BindGroupStates, RenderBundleScope, Tracker}, + RefCount, Stored, SubmissionIndex, +}; +use smallvec::SmallVec; + +use hal::Device as _; +use parking_lot::Mutex; +use thiserror::Error; + +use std::mem; + +/// A struct that keeps lists of resources that are no longer needed by the user. +#[derive(Debug, Default)] +pub(super) struct SuspectedResources { + pub(super) buffers: Vec<id::Valid<id::BufferId>>, + pub(super) textures: Vec<id::Valid<id::TextureId>>, + pub(super) texture_views: Vec<id::Valid<id::TextureViewId>>, + pub(super) samplers: Vec<id::Valid<id::SamplerId>>, + pub(super) bind_groups: Vec<id::Valid<id::BindGroupId>>, + pub(super) compute_pipelines: Vec<id::Valid<id::ComputePipelineId>>, + pub(super) render_pipelines: Vec<id::Valid<id::RenderPipelineId>>, + pub(super) bind_group_layouts: Vec<id::Valid<id::BindGroupLayoutId>>, + pub(super) pipeline_layouts: Vec<Stored<id::PipelineLayoutId>>, + pub(super) render_bundles: Vec<id::Valid<id::RenderBundleId>>, + pub(super) query_sets: Vec<id::Valid<id::QuerySetId>>, +} + +impl SuspectedResources { + pub(super) fn clear(&mut self) { + self.buffers.clear(); + self.textures.clear(); + self.texture_views.clear(); + self.samplers.clear(); + self.bind_groups.clear(); + self.compute_pipelines.clear(); + self.render_pipelines.clear(); + self.bind_group_layouts.clear(); + self.pipeline_layouts.clear(); + self.render_bundles.clear(); + self.query_sets.clear(); + } + + pub(super) fn extend(&mut self, other: &Self) { + self.buffers.extend_from_slice(&other.buffers); + self.textures.extend_from_slice(&other.textures); + self.texture_views.extend_from_slice(&other.texture_views); + self.samplers.extend_from_slice(&other.samplers); + self.bind_groups.extend_from_slice(&other.bind_groups); + self.compute_pipelines + .extend_from_slice(&other.compute_pipelines); + self.render_pipelines + .extend_from_slice(&other.render_pipelines); + self.bind_group_layouts + .extend_from_slice(&other.bind_group_layouts); + self.pipeline_layouts + .extend_from_slice(&other.pipeline_layouts); + self.render_bundles.extend_from_slice(&other.render_bundles); + self.query_sets.extend_from_slice(&other.query_sets); + } + + pub(super) fn add_render_bundle_scope<A: HalApi>(&mut self, trackers: &RenderBundleScope<A>) { + self.buffers.extend(trackers.buffers.used()); + self.textures.extend(trackers.textures.used()); + self.bind_groups.extend(trackers.bind_groups.used()); + self.render_pipelines + .extend(trackers.render_pipelines.used()); + self.query_sets.extend(trackers.query_sets.used()); + } + + pub(super) fn add_bind_group_states<A: HalApi>(&mut self, trackers: &BindGroupStates<A>) { + self.buffers.extend(trackers.buffers.used()); + self.textures.extend(trackers.textures.used()); + self.texture_views.extend(trackers.views.used()); + self.samplers.extend(trackers.samplers.used()); + } +} + +/// Raw backend resources that should be freed shortly. +#[derive(Debug)] +struct NonReferencedResources<A: hal::Api> { + buffers: Vec<A::Buffer>, + textures: Vec<A::Texture>, + texture_views: Vec<A::TextureView>, + samplers: Vec<A::Sampler>, + bind_groups: Vec<A::BindGroup>, + compute_pipes: Vec<A::ComputePipeline>, + render_pipes: Vec<A::RenderPipeline>, + bind_group_layouts: Vec<A::BindGroupLayout>, + pipeline_layouts: Vec<A::PipelineLayout>, + query_sets: Vec<A::QuerySet>, +} + +impl<A: hal::Api> NonReferencedResources<A> { + fn new() -> Self { + Self { + buffers: Vec::new(), + textures: Vec::new(), + texture_views: Vec::new(), + samplers: Vec::new(), + bind_groups: Vec::new(), + compute_pipes: Vec::new(), + render_pipes: Vec::new(), + bind_group_layouts: Vec::new(), + pipeline_layouts: Vec::new(), + query_sets: Vec::new(), + } + } + + fn extend(&mut self, other: Self) { + self.buffers.extend(other.buffers); + self.textures.extend(other.textures); + self.texture_views.extend(other.texture_views); + self.samplers.extend(other.samplers); + self.bind_groups.extend(other.bind_groups); + self.compute_pipes.extend(other.compute_pipes); + self.render_pipes.extend(other.render_pipes); + self.query_sets.extend(other.query_sets); + assert!(other.bind_group_layouts.is_empty()); + assert!(other.pipeline_layouts.is_empty()); + } + + unsafe fn clean(&mut self, device: &A::Device) { + if !self.buffers.is_empty() { + profiling::scope!("destroy_buffers"); + for raw in self.buffers.drain(..) { + unsafe { device.destroy_buffer(raw) }; + } + } + if !self.textures.is_empty() { + profiling::scope!("destroy_textures"); + for raw in self.textures.drain(..) { + unsafe { device.destroy_texture(raw) }; + } + } + if !self.texture_views.is_empty() { + profiling::scope!("destroy_texture_views"); + for raw in self.texture_views.drain(..) { + unsafe { device.destroy_texture_view(raw) }; + } + } + if !self.samplers.is_empty() { + profiling::scope!("destroy_samplers"); + for raw in self.samplers.drain(..) { + unsafe { device.destroy_sampler(raw) }; + } + } + if !self.bind_groups.is_empty() { + profiling::scope!("destroy_bind_groups"); + for raw in self.bind_groups.drain(..) { + unsafe { device.destroy_bind_group(raw) }; + } + } + if !self.compute_pipes.is_empty() { + profiling::scope!("destroy_compute_pipelines"); + for raw in self.compute_pipes.drain(..) { + unsafe { device.destroy_compute_pipeline(raw) }; + } + } + if !self.render_pipes.is_empty() { + profiling::scope!("destroy_render_pipelines"); + for raw in self.render_pipes.drain(..) { + unsafe { device.destroy_render_pipeline(raw) }; + } + } + if !self.bind_group_layouts.is_empty() { + profiling::scope!("destroy_bind_group_layouts"); + for raw in self.bind_group_layouts.drain(..) { + unsafe { device.destroy_bind_group_layout(raw) }; + } + } + if !self.pipeline_layouts.is_empty() { + profiling::scope!("destroy_pipeline_layouts"); + for raw in self.pipeline_layouts.drain(..) { + unsafe { device.destroy_pipeline_layout(raw) }; + } + } + if !self.query_sets.is_empty() { + profiling::scope!("destroy_query_sets"); + for raw in self.query_sets.drain(..) { + unsafe { device.destroy_query_set(raw) }; + } + } + } +} + +/// Resources used by a queue submission, and work to be done once it completes. +struct ActiveSubmission<A: hal::Api> { + /// The index of the submission we track. + /// + /// When `Device::fence`'s value is greater than or equal to this, our queue + /// submission has completed. + index: SubmissionIndex, + + /// Resources to be freed once this queue submission has completed. + /// + /// When the device is polled, for completed submissions, + /// `triage_submissions` merges these into + /// `LifetimeTracker::free_resources`. From there, + /// `LifetimeTracker::cleanup` passes them to the hal to be freed. + /// + /// This includes things like temporary resources and resources that are + /// used by submitted commands but have been dropped by the user (meaning that + /// this submission is their last reference.) + last_resources: NonReferencedResources<A>, + + /// Buffers to be mapped once this submission has completed. + mapped: Vec<id::Valid<id::BufferId>>, + + encoders: Vec<EncoderInFlight<A>>, + work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>, +} + +#[derive(Clone, Debug, Error)] +pub enum WaitIdleError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Tried to wait using a submission index from the wrong device. Submission index is from device {0:?}. Called poll on device {1:?}.")] + WrongSubmissionIndex(id::QueueId, id::DeviceId), + #[error("GPU got stuck :(")] + StuckGpu, +} + +/// Resource tracking for a device. +/// +/// ## Host mapping buffers +/// +/// A buffer cannot be mapped until all active queue submissions that use it +/// have completed. To that end: +/// +/// - Each buffer's `LifeGuard::submission_index` records the index of the +/// most recent queue submission that uses that buffer. +/// +/// - Calling `map_async` adds the buffer to `self.mapped`, and changes +/// `Buffer::map_state` to prevent it from being used in any new +/// submissions. +/// +/// - When the device is polled, the following `LifetimeTracker` methods decide +/// what should happen next: +/// +/// 1) `triage_mapped` drains `self.mapped`, checking the submission index +/// of each buffer against the queue submissions that have finished +/// execution. Buffers used by submissions still in flight go in +/// `self.active[index].mapped`, and the rest go into +/// `self.ready_to_map`. +/// +/// 2) `triage_submissions` moves entries in `self.active[i]` for completed +/// submissions to `self.ready_to_map`. At this point, both +/// `self.active` and `self.ready_to_map` are up to date with the given +/// submission index. +/// +/// 3) `handle_mapping` drains `self.ready_to_map` and actually maps the +/// buffers, collecting a list of notification closures to call. But any +/// buffers that were dropped by the user get moved to +/// `self.free_resources`. +/// +/// 4) `cleanup` frees everything in `free_resources`. +/// +/// Only `self.mapped` holds a `RefCount` for the buffer; it is dropped by +/// `triage_mapped`. +pub(super) struct LifetimeTracker<A: hal::Api> { + /// Resources that the user has requested be mapped, but which are used by + /// queue submissions still in flight. + mapped: Vec<Stored<id::BufferId>>, + + /// Buffers can be used in a submission that is yet to be made, by the + /// means of `write_buffer()`, so we have a special place for them. + pub future_suspected_buffers: Vec<Stored<id::BufferId>>, + + /// Textures can be used in the upcoming submission by `write_texture`. + pub future_suspected_textures: Vec<Stored<id::TextureId>>, + + /// Resources whose user handle has died (i.e. drop/destroy has been called) + /// and will likely be ready for destruction soon. + pub suspected_resources: SuspectedResources, + + /// Resources used by queue submissions still in flight. One entry per + /// submission, with older submissions appearing before younger. + /// + /// Entries are added by `track_submission` and drained by + /// `LifetimeTracker::triage_submissions`. Lots of methods contribute data + /// to particular entries. + active: Vec<ActiveSubmission<A>>, + + /// Raw backend resources that are neither referenced nor used. + /// + /// These are freed by `LifeTracker::cleanup`, which is called from periodic + /// maintenance functions like `Global::device_poll`, and when a device is + /// destroyed. + free_resources: NonReferencedResources<A>, + + /// Buffers the user has asked us to map, and which are not used by any + /// queue submission still in flight. + ready_to_map: Vec<id::Valid<id::BufferId>>, +} + +impl<A: hal::Api> LifetimeTracker<A> { + pub fn new() -> Self { + Self { + mapped: Vec::new(), + future_suspected_buffers: Vec::new(), + future_suspected_textures: Vec::new(), + suspected_resources: SuspectedResources::default(), + active: Vec::new(), + free_resources: NonReferencedResources::new(), + ready_to_map: Vec::new(), + } + } + + /// Return true if there are no queue submissions still in flight. + pub fn queue_empty(&self) -> bool { + self.active.is_empty() + } + + /// Start tracking resources associated with a new queue submission. + pub fn track_submission( + &mut self, + index: SubmissionIndex, + temp_resources: impl Iterator<Item = TempResource<A>>, + encoders: Vec<EncoderInFlight<A>>, + ) { + let mut last_resources = NonReferencedResources::new(); + for res in temp_resources { + match res { + TempResource::Buffer(raw) => last_resources.buffers.push(raw), + TempResource::Texture(raw, views) => { + last_resources.textures.push(raw); + last_resources.texture_views.extend(views); + } + } + } + + self.active.push(ActiveSubmission { + index, + last_resources, + mapped: Vec::new(), + encoders, + work_done_closures: SmallVec::new(), + }); + } + + pub fn post_submit(&mut self) { + self.suspected_resources.buffers.extend( + self.future_suspected_buffers + .drain(..) + .map(|stored| stored.value), + ); + self.suspected_resources.textures.extend( + self.future_suspected_textures + .drain(..) + .map(|stored| stored.value), + ); + } + + pub(crate) fn map(&mut self, value: id::Valid<id::BufferId>, ref_count: RefCount) { + self.mapped.push(Stored { value, ref_count }); + } + + /// Sort out the consequences of completed submissions. + /// + /// Assume that all submissions up through `last_done` have completed. + /// + /// - Buffers used by those submissions are now ready to map, if + /// requested. Add any buffers in the submission's [`mapped`] list to + /// [`self.ready_to_map`], where [`LifetimeTracker::handle_mapping`] will find + /// them. + /// + /// - Resources whose final use was in those submissions are now ready to + /// free. Add any resources in the submission's [`last_resources`] table + /// to [`self.free_resources`], where [`LifetimeTracker::cleanup`] will find + /// them. + /// + /// Return a list of [`SubmittedWorkDoneClosure`]s to run. + /// + /// [`mapped`]: ActiveSubmission::mapped + /// [`self.ready_to_map`]: LifetimeTracker::ready_to_map + /// [`last_resources`]: ActiveSubmission::last_resources + /// [`self.free_resources`]: LifetimeTracker::free_resources + /// [`SubmittedWorkDoneClosure`]: crate::device::queue::SubmittedWorkDoneClosure + #[must_use] + pub fn triage_submissions( + &mut self, + last_done: SubmissionIndex, + command_allocator: &Mutex<super::CommandAllocator<A>>, + ) -> SmallVec<[SubmittedWorkDoneClosure; 1]> { + profiling::scope!("triage_submissions"); + + //TODO: enable when `is_sorted_by_key` is stable + //debug_assert!(self.active.is_sorted_by_key(|a| a.index)); + let done_count = self + .active + .iter() + .position(|a| a.index > last_done) + .unwrap_or(self.active.len()); + + let mut work_done_closures = SmallVec::new(); + for a in self.active.drain(..done_count) { + log::trace!("Active submission {} is done", a.index); + self.free_resources.extend(a.last_resources); + self.ready_to_map.extend(a.mapped); + for encoder in a.encoders { + let raw = unsafe { encoder.land() }; + command_allocator.lock().release_encoder(raw); + } + work_done_closures.extend(a.work_done_closures); + } + work_done_closures + } + + pub fn cleanup(&mut self, device: &A::Device) { + profiling::scope!("LifetimeTracker::cleanup"); + unsafe { + self.free_resources.clean(device); + } + } + + pub fn schedule_resource_destruction( + &mut self, + temp_resource: TempResource<A>, + last_submit_index: SubmissionIndex, + ) { + let resources = self + .active + .iter_mut() + .find(|a| a.index == last_submit_index) + .map_or(&mut self.free_resources, |a| &mut a.last_resources); + match temp_resource { + TempResource::Buffer(raw) => resources.buffers.push(raw), + TempResource::Texture(raw, views) => { + resources.texture_views.extend(views); + resources.textures.push(raw); + } + } + } + + pub fn add_work_done_closure( + &mut self, + closure: SubmittedWorkDoneClosure, + ) -> Option<SubmittedWorkDoneClosure> { + match self.active.last_mut() { + Some(active) => { + active.work_done_closures.push(closure); + None + } + // Note: we can't immediately invoke the closure, since it assumes + // nothing is currently locked in the hubs. + None => Some(closure), + } + } +} + +impl<A: HalApi> LifetimeTracker<A> { + /// Identify resources to free, according to `trackers` and `self.suspected_resources`. + /// + /// Given `trackers`, the [`Tracker`] belonging to same [`Device`] as + /// `self`, and `hub`, the [`Hub`] to which that `Device` belongs: + /// + /// Remove from `trackers` each resource mentioned in + /// [`self.suspected_resources`]. If `trackers` held the final reference to + /// that resource, add it to the appropriate free list, to be destroyed by + /// the hal: + /// + /// - Add resources used by queue submissions still in flight to the + /// [`last_resources`] table of the last such submission's entry in + /// [`self.active`]. When that submission has finished execution. the + /// [`triage_submissions`] method will move them to + /// [`self.free_resources`]. + /// + /// - Add resources that can be freed right now to [`self.free_resources`] + /// directly. [`LifetimeTracker::cleanup`] will take care of them as + /// part of this poll. + /// + /// ## Entrained resources + /// + /// This function finds resources that are used only by other resources + /// ready to be freed, and adds those to the free lists as well. For + /// example, if there's some texture `T` used only by some texture view + /// `TV`, then if `TV` can be freed, `T` gets added to the free lists too. + /// + /// Since `wgpu-core` resource ownership patterns are acyclic, we can visit + /// each type that can be owned after all types that could possibly own + /// it. This way, we can detect all free-able objects in a single pass, + /// simply by starting with types that are roots of the ownership DAG (like + /// render bundles) and working our way towards leaf types (like buffers). + /// + /// [`Device`]: super::Device + /// [`self.suspected_resources`]: LifetimeTracker::suspected_resources + /// [`last_resources`]: ActiveSubmission::last_resources + /// [`self.active`]: LifetimeTracker::active + /// [`triage_submissions`]: LifetimeTracker::triage_submissions + /// [`self.free_resources`]: LifetimeTracker::free_resources + pub(super) fn triage_suspected<G: GlobalIdentityHandlerFactory>( + &mut self, + hub: &Hub<A, G>, + trackers: &Mutex<Tracker<A>>, + #[cfg(feature = "trace")] trace: Option<&Mutex<trace::Trace>>, + token: &mut Token<super::Device<A>>, + ) { + profiling::scope!("triage_suspected"); + + if !self.suspected_resources.render_bundles.is_empty() { + let (mut guard, _) = hub.render_bundles.write(token); + let mut trackers = trackers.lock(); + + while let Some(id) = self.suspected_resources.render_bundles.pop() { + if trackers.bundles.remove_abandoned(id) { + log::debug!("Bundle {:?} will be destroyed", id); + #[cfg(feature = "trace")] + if let Some(t) = trace { + t.lock().add(trace::Action::DestroyRenderBundle(id.0)); + } + + if let Some(res) = hub.render_bundles.unregister_locked(id.0, &mut *guard) { + self.suspected_resources.add_render_bundle_scope(&res.used); + } + } + } + } + + if !self.suspected_resources.bind_groups.is_empty() { + let (mut guard, _) = hub.bind_groups.write(token); + let mut trackers = trackers.lock(); + + while let Some(id) = self.suspected_resources.bind_groups.pop() { + if trackers.bind_groups.remove_abandoned(id) { + log::debug!("Bind group {:?} will be destroyed", id); + #[cfg(feature = "trace")] + if let Some(t) = trace { + t.lock().add(trace::Action::DestroyBindGroup(id.0)); + } + + if let Some(res) = hub.bind_groups.unregister_locked(id.0, &mut *guard) { + self.suspected_resources.add_bind_group_states(&res.used); + + self.suspected_resources + .bind_group_layouts + .push(res.layout_id); + + let submit_index = res.life_guard.life_count(); + self.active + .iter_mut() + .find(|a| a.index == submit_index) + .map_or(&mut self.free_resources, |a| &mut a.last_resources) + .bind_groups + .push(res.raw); + } + } + } + } + + if !self.suspected_resources.texture_views.is_empty() { + let (mut guard, _) = hub.texture_views.write(token); + let mut trackers = trackers.lock(); + + let mut list = mem::take(&mut self.suspected_resources.texture_views); + for id in list.drain(..) { + if trackers.views.remove_abandoned(id) { + log::debug!("Texture view {:?} will be destroyed", id); + #[cfg(feature = "trace")] + if let Some(t) = trace { + t.lock().add(trace::Action::DestroyTextureView(id.0)); + } + + if let Some(res) = hub.texture_views.unregister_locked(id.0, &mut *guard) { + self.suspected_resources.textures.push(res.parent_id.value); + let submit_index = res.life_guard.life_count(); + self.active + .iter_mut() + .find(|a| a.index == submit_index) + .map_or(&mut self.free_resources, |a| &mut a.last_resources) + .texture_views + .push(res.raw); + } + } + } + self.suspected_resources.texture_views = list; + } + + if !self.suspected_resources.textures.is_empty() { + let (mut guard, _) = hub.textures.write(token); + let mut trackers = trackers.lock(); + + for id in self.suspected_resources.textures.drain(..) { + if trackers.textures.remove_abandoned(id) { + log::debug!("Texture {:?} will be destroyed", id); + #[cfg(feature = "trace")] + if let Some(t) = trace { + t.lock().add(trace::Action::DestroyTexture(id.0)); + } + + if let Some(res) = hub.textures.unregister_locked(id.0, &mut *guard) { + let submit_index = res.life_guard.life_count(); + let raw = match res.inner { + resource::TextureInner::Native { raw: Some(raw) } => raw, + _ => continue, + }; + let non_referenced_resources = self + .active + .iter_mut() + .find(|a| a.index == submit_index) + .map_or(&mut self.free_resources, |a| &mut a.last_resources); + + non_referenced_resources.textures.push(raw); + if let resource::TextureClearMode::RenderPass { clear_views, .. } = + res.clear_mode + { + non_referenced_resources + .texture_views + .extend(clear_views.into_iter()); + } + } + } + } + } + + if !self.suspected_resources.samplers.is_empty() { + let (mut guard, _) = hub.samplers.write(token); + let mut trackers = trackers.lock(); + + for id in self.suspected_resources.samplers.drain(..) { + if trackers.samplers.remove_abandoned(id) { + log::debug!("Sampler {:?} will be destroyed", id); + #[cfg(feature = "trace")] + if let Some(t) = trace { + t.lock().add(trace::Action::DestroySampler(id.0)); + } + + if let Some(res) = hub.samplers.unregister_locked(id.0, &mut *guard) { + let submit_index = res.life_guard.life_count(); + self.active + .iter_mut() + .find(|a| a.index == submit_index) + .map_or(&mut self.free_resources, |a| &mut a.last_resources) + .samplers + .push(res.raw); + } + } + } + } + + if !self.suspected_resources.buffers.is_empty() { + let (mut guard, _) = hub.buffers.write(token); + let mut trackers = trackers.lock(); + + for id in self.suspected_resources.buffers.drain(..) { + if trackers.buffers.remove_abandoned(id) { + log::debug!("Buffer {:?} will be destroyed", id); + #[cfg(feature = "trace")] + if let Some(t) = trace { + t.lock().add(trace::Action::DestroyBuffer(id.0)); + } + + if let Some(res) = hub.buffers.unregister_locked(id.0, &mut *guard) { + let submit_index = res.life_guard.life_count(); + if let resource::BufferMapState::Init { stage_buffer, .. } = res.map_state { + self.free_resources.buffers.push(stage_buffer); + } + self.active + .iter_mut() + .find(|a| a.index == submit_index) + .map_or(&mut self.free_resources, |a| &mut a.last_resources) + .buffers + .extend(res.raw); + } + } + } + } + + if !self.suspected_resources.compute_pipelines.is_empty() { + let (mut guard, _) = hub.compute_pipelines.write(token); + let mut trackers = trackers.lock(); + + for id in self.suspected_resources.compute_pipelines.drain(..) { + if trackers.compute_pipelines.remove_abandoned(id) { + log::debug!("Compute pipeline {:?} will be destroyed", id); + #[cfg(feature = "trace")] + if let Some(t) = trace { + t.lock().add(trace::Action::DestroyComputePipeline(id.0)); + } + + if let Some(res) = hub.compute_pipelines.unregister_locked(id.0, &mut *guard) { + let submit_index = res.life_guard.life_count(); + self.active + .iter_mut() + .find(|a| a.index == submit_index) + .map_or(&mut self.free_resources, |a| &mut a.last_resources) + .compute_pipes + .push(res.raw); + } + } + } + } + + if !self.suspected_resources.render_pipelines.is_empty() { + let (mut guard, _) = hub.render_pipelines.write(token); + let mut trackers = trackers.lock(); + + for id in self.suspected_resources.render_pipelines.drain(..) { + if trackers.render_pipelines.remove_abandoned(id) { + log::debug!("Render pipeline {:?} will be destroyed", id); + #[cfg(feature = "trace")] + if let Some(t) = trace { + t.lock().add(trace::Action::DestroyRenderPipeline(id.0)); + } + + if let Some(res) = hub.render_pipelines.unregister_locked(id.0, &mut *guard) { + let submit_index = res.life_guard.life_count(); + self.active + .iter_mut() + .find(|a| a.index == submit_index) + .map_or(&mut self.free_resources, |a| &mut a.last_resources) + .render_pipes + .push(res.raw); + } + } + } + } + + if !self.suspected_resources.pipeline_layouts.is_empty() { + let (mut guard, _) = hub.pipeline_layouts.write(token); + + for Stored { + value: id, + ref_count, + } in self.suspected_resources.pipeline_layouts.drain(..) + { + //Note: this has to happen after all the suspected pipelines are destroyed + if ref_count.load() == 1 { + log::debug!("Pipeline layout {:?} will be destroyed", id); + #[cfg(feature = "trace")] + if let Some(t) = trace { + t.lock().add(trace::Action::DestroyPipelineLayout(id.0)); + } + + if let Some(lay) = hub.pipeline_layouts.unregister_locked(id.0, &mut *guard) { + self.suspected_resources + .bind_group_layouts + .extend_from_slice(&lay.bind_group_layout_ids); + self.free_resources.pipeline_layouts.push(lay.raw); + } + } + } + } + + if !self.suspected_resources.bind_group_layouts.is_empty() { + let (mut guard, _) = hub.bind_group_layouts.write(token); + + for id in self.suspected_resources.bind_group_layouts.drain(..) { + //Note: this has to happen after all the suspected pipelines are destroyed + //Note: nothing else can bump the refcount since the guard is locked exclusively + //Note: same BGL can appear multiple times in the list, but only the last + // encounter could drop the refcount to 0. + if guard[id].multi_ref_count.dec_and_check_empty() { + log::debug!("Bind group layout {:?} will be destroyed", id); + #[cfg(feature = "trace")] + if let Some(t) = trace { + t.lock().add(trace::Action::DestroyBindGroupLayout(id.0)); + } + if let Some(lay) = hub.bind_group_layouts.unregister_locked(id.0, &mut *guard) { + self.free_resources.bind_group_layouts.push(lay.raw); + } + } + } + } + + if !self.suspected_resources.query_sets.is_empty() { + let (mut guard, _) = hub.query_sets.write(token); + let mut trackers = trackers.lock(); + + for id in self.suspected_resources.query_sets.drain(..) { + if trackers.query_sets.remove_abandoned(id) { + log::debug!("Query set {:?} will be destroyed", id); + // #[cfg(feature = "trace")] + // trace.map(|t| t.lock().add(trace::Action::DestroyComputePipeline(id.0))); + if let Some(res) = hub.query_sets.unregister_locked(id.0, &mut *guard) { + let submit_index = res.life_guard.life_count(); + self.active + .iter_mut() + .find(|a| a.index == submit_index) + .map_or(&mut self.free_resources, |a| &mut a.last_resources) + .query_sets + .push(res.raw); + } + } + } + } + } + + /// Determine which buffers are ready to map, and which must wait for the + /// GPU. + /// + /// See the documentation for [`LifetimeTracker`] for details. + pub(super) fn triage_mapped<G: GlobalIdentityHandlerFactory>( + &mut self, + hub: &Hub<A, G>, + token: &mut Token<super::Device<A>>, + ) { + if self.mapped.is_empty() { + return; + } + let (buffer_guard, _) = hub.buffers.read(token); + + for stored in self.mapped.drain(..) { + let resource_id = stored.value; + let buf = &buffer_guard[resource_id]; + + let submit_index = buf.life_guard.life_count(); + log::trace!( + "Mapping of {:?} at submission {:?} gets assigned to active {:?}", + resource_id, + submit_index, + self.active.iter().position(|a| a.index == submit_index) + ); + + self.active + .iter_mut() + .find(|a| a.index == submit_index) + .map_or(&mut self.ready_to_map, |a| &mut a.mapped) + .push(resource_id); + } + } + + /// Map the buffers in `self.ready_to_map`. + /// + /// Return a list of mapping notifications to send. + /// + /// See the documentation for [`LifetimeTracker`] for details. + #[must_use] + pub(super) fn handle_mapping<G: GlobalIdentityHandlerFactory>( + &mut self, + hub: &Hub<A, G>, + raw: &A::Device, + trackers: &Mutex<Tracker<A>>, + token: &mut Token<super::Device<A>>, + ) -> Vec<super::BufferMapPendingClosure> { + if self.ready_to_map.is_empty() { + return Vec::new(); + } + let (mut buffer_guard, _) = hub.buffers.write(token); + let mut pending_callbacks: Vec<super::BufferMapPendingClosure> = + Vec::with_capacity(self.ready_to_map.len()); + let mut trackers = trackers.lock(); + for buffer_id in self.ready_to_map.drain(..) { + let buffer = &mut buffer_guard[buffer_id]; + if buffer.life_guard.ref_count.is_none() && trackers.buffers.remove_abandoned(buffer_id) + { + buffer.map_state = resource::BufferMapState::Idle; + log::debug!("Mapping request is dropped because the buffer is destroyed."); + if let Some(buf) = hub + .buffers + .unregister_locked(buffer_id.0, &mut *buffer_guard) + { + self.free_resources.buffers.extend(buf.raw); + } + } else { + let mapping = match std::mem::replace( + &mut buffer.map_state, + resource::BufferMapState::Idle, + ) { + resource::BufferMapState::Waiting(pending_mapping) => pending_mapping, + // Mapping cancelled + resource::BufferMapState::Idle => continue, + // Mapping queued at least twice by map -> unmap -> map + // and was already successfully mapped below + active @ resource::BufferMapState::Active { .. } => { + buffer.map_state = active; + continue; + } + _ => panic!("No pending mapping."), + }; + let status = if mapping.range.start != mapping.range.end { + log::debug!("Buffer {:?} map state -> Active", buffer_id); + let host = mapping.op.host; + let size = mapping.range.end - mapping.range.start; + match super::map_buffer(raw, buffer, mapping.range.start, size, host) { + Ok(ptr) => { + buffer.map_state = resource::BufferMapState::Active { + ptr, + range: mapping.range.start..mapping.range.start + size, + host, + }; + Ok(()) + } + Err(e) => { + log::error!("Mapping failed {:?}", e); + Err(e) + } + } + } else { + buffer.map_state = resource::BufferMapState::Active { + ptr: std::ptr::NonNull::dangling(), + range: mapping.range, + host: mapping.op.host, + }; + Ok(()) + }; + pending_callbacks.push((mapping.op, status)); + } + } + pending_callbacks + } +} diff --git a/third_party/rust/wgpu-core/src/device/mod.rs b/third_party/rust/wgpu-core/src/device/mod.rs new file mode 100644 index 0000000000..955e8b4c22 --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/mod.rs @@ -0,0 +1,5822 @@ +use crate::{ + binding_model, command, conv, + device::life::WaitIdleError, + hub::{Global, GlobalIdentityHandlerFactory, HalApi, Hub, Input, InvalidId, Storage, Token}, + id, + init_tracker::{ + BufferInitTracker, BufferInitTrackerAction, MemoryInitKind, TextureInitRange, + TextureInitTracker, TextureInitTrackerAction, + }, + instance::{self, Adapter, Surface}, + pipeline, present, + resource::{self, BufferAccessResult, BufferMapState}, + resource::{BufferAccessError, BufferMapOperation}, + track::{BindGroupStates, TextureSelector, Tracker}, + validation::{self, check_buffer_usage, check_texture_usage}, + FastHashMap, Label, LabelHelpers as _, LifeGuard, MultiRefCount, RefCount, Stored, + SubmissionIndex, DOWNLEVEL_ERROR_MESSAGE, +}; + +use arrayvec::ArrayVec; +use hal::{CommandEncoder as _, Device as _}; +use parking_lot::{Mutex, MutexGuard}; +use smallvec::SmallVec; +use thiserror::Error; +use wgt::{BufferAddress, TextureFormat, TextureViewDimension}; + +use std::{borrow::Cow, iter, mem, num::NonZeroU32, ops::Range, ptr}; + +mod life; +pub mod queue; +#[cfg(any(feature = "trace", feature = "replay"))] +pub mod trace; + +pub const SHADER_STAGE_COUNT: usize = 3; +// Should be large enough for the largest possible texture row. This +// value is enough for a 16k texture with float4 format. +pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10; + +const CLEANUP_WAIT_MS: u32 = 5000; + +const IMPLICIT_FAILURE: &str = "failed implicit"; +const EP_FAILURE: &str = "EP is invalid"; + +pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>; + +#[repr(C)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub enum HostMap { + Read, + Write, +} + +#[derive(Clone, Debug, Hash, PartialEq)] +#[cfg_attr(feature = "serial-pass", derive(serde::Deserialize, serde::Serialize))] +pub(crate) struct AttachmentData<T> { + pub colors: ArrayVec<Option<T>, { hal::MAX_COLOR_ATTACHMENTS }>, + pub resolves: ArrayVec<T, { hal::MAX_COLOR_ATTACHMENTS }>, + pub depth_stencil: Option<T>, +} +impl<T: PartialEq> Eq for AttachmentData<T> {} +impl<T> AttachmentData<T> { + pub(crate) fn map<U, F: Fn(&T) -> U>(&self, fun: F) -> AttachmentData<U> { + AttachmentData { + colors: self.colors.iter().map(|c| c.as_ref().map(&fun)).collect(), + resolves: self.resolves.iter().map(&fun).collect(), + depth_stencil: self.depth_stencil.as_ref().map(&fun), + } + } +} + +#[derive(Clone, Debug, Hash, PartialEq)] +#[cfg_attr(feature = "serial-pass", derive(serde::Deserialize, serde::Serialize))] +pub(crate) struct RenderPassContext { + pub attachments: AttachmentData<TextureFormat>, + pub sample_count: u32, + pub multiview: Option<NonZeroU32>, +} +#[derive(Clone, Debug, Error)] +pub enum RenderPassCompatibilityError { + #[error("Incompatible color attachment: the renderpass expected {0:?} but was given {1:?}")] + IncompatibleColorAttachment(Vec<Option<TextureFormat>>, Vec<Option<TextureFormat>>), + #[error( + "Incompatible depth-stencil attachment: the renderpass expected {0:?} but was given {1:?}" + )] + IncompatibleDepthStencilAttachment(Option<TextureFormat>, Option<TextureFormat>), + #[error("Incompatible sample count: the renderpass expected {0:?} but was given {1:?}")] + IncompatibleSampleCount(u32, u32), + #[error("Incompatible multiview: the renderpass expected {0:?} but was given {1:?}")] + IncompatibleMultiview(Option<NonZeroU32>, Option<NonZeroU32>), +} + +impl RenderPassContext { + // Assumes the renderpass only contains one subpass + pub(crate) fn check_compatible( + &self, + other: &Self, + ) -> Result<(), RenderPassCompatibilityError> { + if self.attachments.colors != other.attachments.colors { + return Err(RenderPassCompatibilityError::IncompatibleColorAttachment( + self.attachments.colors.iter().cloned().collect(), + other.attachments.colors.iter().cloned().collect(), + )); + } + if self.attachments.depth_stencil != other.attachments.depth_stencil { + return Err( + RenderPassCompatibilityError::IncompatibleDepthStencilAttachment( + self.attachments.depth_stencil, + other.attachments.depth_stencil, + ), + ); + } + if self.sample_count != other.sample_count { + return Err(RenderPassCompatibilityError::IncompatibleSampleCount( + self.sample_count, + other.sample_count, + )); + } + if self.multiview != other.multiview { + return Err(RenderPassCompatibilityError::IncompatibleMultiview( + self.multiview, + other.multiview, + )); + } + Ok(()) + } +} + +pub type BufferMapPendingClosure = (BufferMapOperation, BufferAccessResult); + +#[derive(Default)] +pub struct UserClosures { + pub mappings: Vec<BufferMapPendingClosure>, + pub submissions: SmallVec<[queue::SubmittedWorkDoneClosure; 1]>, +} + +impl UserClosures { + fn extend(&mut self, other: Self) { + self.mappings.extend(other.mappings); + self.submissions.extend(other.submissions); + } + + fn fire(self) { + // Note: this logic is specifically moved out of `handle_mapping()` in order to + // have nothing locked by the time we execute users callback code. + for (operation, status) in self.mappings { + operation.callback.call(status); + } + for closure in self.submissions { + closure.call(); + } + } +} + +fn map_buffer<A: hal::Api>( + raw: &A::Device, + buffer: &mut resource::Buffer<A>, + offset: BufferAddress, + size: BufferAddress, + kind: HostMap, +) -> Result<ptr::NonNull<u8>, BufferAccessError> { + let mapping = unsafe { + raw.map_buffer(buffer.raw.as_ref().unwrap(), offset..offset + size) + .map_err(DeviceError::from)? + }; + + buffer.sync_mapped_writes = match kind { + HostMap::Read if !mapping.is_coherent => unsafe { + raw.invalidate_mapped_ranges( + buffer.raw.as_ref().unwrap(), + iter::once(offset..offset + size), + ); + None + }, + HostMap::Write if !mapping.is_coherent => Some(offset..offset + size), + _ => None, + }; + + assert_eq!(offset % wgt::COPY_BUFFER_ALIGNMENT, 0); + assert_eq!(size % wgt::COPY_BUFFER_ALIGNMENT, 0); + // Zero out uninitialized parts of the mapping. (Spec dictates all resources + // behave as if they were initialized with zero) + // + // If this is a read mapping, ideally we would use a `clear_buffer` command + // before reading the data from GPU (i.e. `invalidate_range`). However, this + // would require us to kick off and wait for a command buffer or piggy back + // on an existing one (the later is likely the only worthwhile option). As + // reading uninitialized memory isn't a particular important path to + // support, we instead just initialize the memory here and make sure it is + // GPU visible, so this happens at max only once for every buffer region. + // + // If this is a write mapping zeroing out the memory here is the only + // reasonable way as all data is pushed to GPU anyways. + + // No need to flush if it is flushed later anyways. + let zero_init_needs_flush_now = mapping.is_coherent && buffer.sync_mapped_writes.is_none(); + let mapped = unsafe { std::slice::from_raw_parts_mut(mapping.ptr.as_ptr(), size as usize) }; + + for uninitialized in buffer.initialization_status.drain(offset..(size + offset)) { + // The mapping's pointer is already offset, however we track the + // uninitialized range relative to the buffer's start. + let fill_range = + (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize; + mapped[fill_range].fill(0); + + if zero_init_needs_flush_now { + unsafe { + raw.flush_mapped_ranges(buffer.raw.as_ref().unwrap(), iter::once(uninitialized)) + }; + } + } + + Ok(mapping.ptr) +} + +struct CommandAllocator<A: hal::Api> { + free_encoders: Vec<A::CommandEncoder>, +} + +impl<A: hal::Api> CommandAllocator<A> { + fn acquire_encoder( + &mut self, + device: &A::Device, + queue: &A::Queue, + ) -> Result<A::CommandEncoder, hal::DeviceError> { + match self.free_encoders.pop() { + Some(encoder) => Ok(encoder), + None => unsafe { + let hal_desc = hal::CommandEncoderDescriptor { label: None, queue }; + device.create_command_encoder(&hal_desc) + }, + } + } + + fn release_encoder(&mut self, encoder: A::CommandEncoder) { + self.free_encoders.push(encoder); + } + + fn dispose(self, device: &A::Device) { + log::info!("Destroying {} command encoders", self.free_encoders.len()); + for cmd_encoder in self.free_encoders { + unsafe { + device.destroy_command_encoder(cmd_encoder); + } + } + } +} + +/// Structure describing a logical device. Some members are internally mutable, +/// stored behind mutexes. +/// +/// TODO: establish clear order of locking for these: +/// `mem_allocator`, `desc_allocator`, `life_tracker`, `trackers`, +/// `render_passes`, `pending_writes`, `trace`. +/// +/// Currently, the rules are: +/// 1. `life_tracker` is locked after `hub.devices`, enforced by the type system +/// 1. `self.trackers` is locked last (unenforced) +/// 1. `self.trace` is locked last (unenforced) +pub struct Device<A: HalApi> { + pub(crate) raw: A::Device, + pub(crate) adapter_id: Stored<id::AdapterId>, + pub(crate) queue: A::Queue, + pub(crate) zero_buffer: A::Buffer, + //pub(crate) cmd_allocator: command::CommandAllocator<A>, + //mem_allocator: Mutex<alloc::MemoryAllocator<A>>, + //desc_allocator: Mutex<descriptor::DescriptorAllocator<A>>, + //Note: The submission index here corresponds to the last submission that is done. + pub(crate) life_guard: LifeGuard, + + /// A clone of `life_guard.ref_count`. + /// + /// Holding a separate clone of the `RefCount` here lets us tell whether the + /// device is referenced by other resources, even if `life_guard.ref_count` + /// was set to `None` by a call to `device_drop`. + ref_count: RefCount, + + command_allocator: Mutex<CommandAllocator<A>>, + pub(crate) active_submission_index: SubmissionIndex, + fence: A::Fence, + + /// All live resources allocated with this [`Device`]. + /// + /// Has to be locked temporarily only (locked last) + pub(crate) trackers: Mutex<Tracker<A>>, + // Life tracker should be locked right after the device and before anything else. + life_tracker: Mutex<life::LifetimeTracker<A>>, + /// Temporary storage for resource management functions. Cleared at the end + /// of every call (unless an error occurs). + temp_suspected: life::SuspectedResources, + pub(crate) alignments: hal::Alignments, + pub(crate) limits: wgt::Limits, + pub(crate) features: wgt::Features, + pub(crate) downlevel: wgt::DownlevelCapabilities, + // TODO: move this behind another mutex. This would allow several methods to + // switch to borrow Device immutably, such as `write_buffer`, `write_texture`, + // and `buffer_unmap`. + pending_writes: queue::PendingWrites<A>, + #[cfg(feature = "trace")] + pub(crate) trace: Option<Mutex<trace::Trace>>, +} + +#[derive(Clone, Debug, Error)] +pub enum CreateDeviceError { + #[error("not enough memory left")] + OutOfMemory, + #[error("failed to create internal buffer for initializing textures")] + FailedToCreateZeroBuffer(#[from] DeviceError), +} + +impl<A: HalApi> Device<A> { + pub(crate) fn require_features(&self, feature: wgt::Features) -> Result<(), MissingFeatures> { + if self.features.contains(feature) { + Ok(()) + } else { + Err(MissingFeatures(feature)) + } + } + + pub(crate) fn require_downlevel_flags( + &self, + flags: wgt::DownlevelFlags, + ) -> Result<(), MissingDownlevelFlags> { + if self.downlevel.flags.contains(flags) { + Ok(()) + } else { + Err(MissingDownlevelFlags(flags)) + } + } +} + +impl<A: HalApi> Device<A> { + pub(crate) fn new( + open: hal::OpenDevice<A>, + adapter_id: Stored<id::AdapterId>, + alignments: hal::Alignments, + downlevel: wgt::DownlevelCapabilities, + desc: &DeviceDescriptor, + trace_path: Option<&std::path::Path>, + ) -> Result<Self, CreateDeviceError> { + #[cfg(not(feature = "trace"))] + if let Some(_) = trace_path { + log::error!("Feature 'trace' is not enabled"); + } + let fence = + unsafe { open.device.create_fence() }.map_err(|_| CreateDeviceError::OutOfMemory)?; + + let mut com_alloc = CommandAllocator { + free_encoders: Vec::new(), + }; + let pending_encoder = com_alloc + .acquire_encoder(&open.device, &open.queue) + .map_err(|_| CreateDeviceError::OutOfMemory)?; + let mut pending_writes = queue::PendingWrites::<A>::new(pending_encoder); + + // Create zeroed buffer used for texture clears. + let zero_buffer = unsafe { + open.device + .create_buffer(&hal::BufferDescriptor { + label: Some("(wgpu internal) zero init buffer"), + size: ZERO_BUFFER_SIZE, + usage: hal::BufferUses::COPY_SRC | hal::BufferUses::COPY_DST, + memory_flags: hal::MemoryFlags::empty(), + }) + .map_err(DeviceError::from)? + }; + pending_writes.activate(); + unsafe { + pending_writes + .command_encoder + .transition_buffers(iter::once(hal::BufferBarrier { + buffer: &zero_buffer, + usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST, + })); + pending_writes + .command_encoder + .clear_buffer(&zero_buffer, 0..ZERO_BUFFER_SIZE); + pending_writes + .command_encoder + .transition_buffers(iter::once(hal::BufferBarrier { + buffer: &zero_buffer, + usage: hal::BufferUses::COPY_DST..hal::BufferUses::COPY_SRC, + })); + } + + let life_guard = LifeGuard::new("<device>"); + let ref_count = life_guard.add_ref(); + Ok(Self { + raw: open.device, + adapter_id, + queue: open.queue, + zero_buffer, + life_guard, + ref_count, + command_allocator: Mutex::new(com_alloc), + active_submission_index: 0, + fence, + trackers: Mutex::new(Tracker::new()), + life_tracker: Mutex::new(life::LifetimeTracker::new()), + temp_suspected: life::SuspectedResources::default(), + #[cfg(feature = "trace")] + trace: trace_path.and_then(|path| match trace::Trace::new(path) { + Ok(mut trace) => { + trace.add(trace::Action::Init { + desc: desc.clone(), + backend: A::VARIANT, + }); + Some(Mutex::new(trace)) + } + Err(e) => { + log::error!("Unable to start a trace in '{:?}': {:?}", path, e); + None + } + }), + alignments, + limits: desc.limits.clone(), + features: desc.features, + downlevel, + pending_writes, + }) + } + + fn lock_life<'this, 'token: 'this>( + &'this self, + //TODO: fix this - the token has to be borrowed for the lock + _token: &mut Token<'token, Self>, + ) -> MutexGuard<'this, life::LifetimeTracker<A>> { + self.life_tracker.lock() + } + + /// Check this device for completed commands. + /// + /// The `maintain` argument tells how the maintence function should behave, either + /// blocking or just polling the current state of the gpu. + /// + /// Return a pair `(closures, queue_empty)`, where: + /// + /// - `closures` is a list of actions to take: mapping buffers, notifying the user + /// + /// - `queue_empty` is a boolean indicating whether there are more queue + /// submissions still in flight. (We have to take the locks needed to + /// produce this information for other reasons, so we might as well just + /// return it to our callers.) + fn maintain<'this, 'token: 'this, G: GlobalIdentityHandlerFactory>( + &'this self, + hub: &Hub<A, G>, + maintain: wgt::Maintain<queue::WrappedSubmissionIndex>, + token: &mut Token<'token, Self>, + ) -> Result<(UserClosures, bool), WaitIdleError> { + profiling::scope!("Device::maintain"); + let mut life_tracker = self.lock_life(token); + + // Normally, `temp_suspected` exists only to save heap + // allocations: it's cleared at the start of the function + // call, and cleared by the end. But `Global::queue_submit` is + // fallible; if it exits early, it may leave some resources in + // `temp_suspected`. + life_tracker + .suspected_resources + .extend(&self.temp_suspected); + + life_tracker.triage_suspected( + hub, + &self.trackers, + #[cfg(feature = "trace")] + self.trace.as_ref(), + token, + ); + life_tracker.triage_mapped(hub, token); + + let last_done_index = if maintain.is_wait() { + let index_to_wait_for = match maintain { + wgt::Maintain::WaitForSubmissionIndex(submission_index) => { + // We don't need to check to see if the queue id matches + // as we already checked this from inside the poll call. + submission_index.index + } + _ => self.active_submission_index, + }; + unsafe { + self.raw + .wait(&self.fence, index_to_wait_for, CLEANUP_WAIT_MS) + .map_err(DeviceError::from)? + }; + index_to_wait_for + } else { + unsafe { + self.raw + .get_fence_value(&self.fence) + .map_err(DeviceError::from)? + } + }; + + let submission_closures = + life_tracker.triage_submissions(last_done_index, &self.command_allocator); + let mapping_closures = life_tracker.handle_mapping(hub, &self.raw, &self.trackers, token); + life_tracker.cleanup(&self.raw); + + let closures = UserClosures { + mappings: mapping_closures, + submissions: submission_closures, + }; + Ok((closures, life_tracker.queue_empty())) + } + + fn untrack<'this, 'token: 'this, G: GlobalIdentityHandlerFactory>( + &'this mut self, + hub: &Hub<A, G>, + trackers: &Tracker<A>, + token: &mut Token<'token, Self>, + ) { + self.temp_suspected.clear(); + // As the tracker is cleared/dropped, we need to consider all the resources + // that it references for destruction in the next GC pass. + { + let (bind_group_guard, mut token) = hub.bind_groups.read(token); + let (compute_pipe_guard, mut token) = hub.compute_pipelines.read(&mut token); + let (render_pipe_guard, mut token) = hub.render_pipelines.read(&mut token); + let (query_set_guard, mut token) = hub.query_sets.read(&mut token); + let (buffer_guard, mut token) = hub.buffers.read(&mut token); + let (texture_guard, mut token) = hub.textures.read(&mut token); + let (texture_view_guard, mut token) = hub.texture_views.read(&mut token); + let (sampler_guard, _) = hub.samplers.read(&mut token); + + for id in trackers.buffers.used() { + if buffer_guard[id].life_guard.ref_count.is_none() { + self.temp_suspected.buffers.push(id); + } + } + for id in trackers.textures.used() { + if texture_guard[id].life_guard.ref_count.is_none() { + self.temp_suspected.textures.push(id); + } + } + for id in trackers.views.used() { + if texture_view_guard[id].life_guard.ref_count.is_none() { + self.temp_suspected.texture_views.push(id); + } + } + for id in trackers.bind_groups.used() { + if bind_group_guard[id].life_guard.ref_count.is_none() { + self.temp_suspected.bind_groups.push(id); + } + } + for id in trackers.samplers.used() { + if sampler_guard[id].life_guard.ref_count.is_none() { + self.temp_suspected.samplers.push(id); + } + } + for id in trackers.compute_pipelines.used() { + if compute_pipe_guard[id].life_guard.ref_count.is_none() { + self.temp_suspected.compute_pipelines.push(id); + } + } + for id in trackers.render_pipelines.used() { + if render_pipe_guard[id].life_guard.ref_count.is_none() { + self.temp_suspected.render_pipelines.push(id); + } + } + for id in trackers.query_sets.used() { + if query_set_guard[id].life_guard.ref_count.is_none() { + self.temp_suspected.query_sets.push(id); + } + } + } + + self.lock_life(token) + .suspected_resources + .extend(&self.temp_suspected); + + self.temp_suspected.clear(); + } + + fn create_buffer( + &self, + self_id: id::DeviceId, + desc: &resource::BufferDescriptor, + transient: bool, + ) -> Result<resource::Buffer<A>, resource::CreateBufferError> { + debug_assert_eq!(self_id.backend(), A::VARIANT); + + if desc.size > self.limits.max_buffer_size { + return Err(resource::CreateBufferError::MaxBufferSize { + requested: desc.size, + maximum: self.limits.max_buffer_size, + }); + } + + if desc.usage.contains(wgt::BufferUsages::INDEX) + && desc.usage.contains( + wgt::BufferUsages::VERTEX + | wgt::BufferUsages::UNIFORM + | wgt::BufferUsages::INDIRECT + | wgt::BufferUsages::STORAGE, + ) + { + self.require_downlevel_flags(wgt::DownlevelFlags::UNRESTRICTED_INDEX_BUFFER)?; + } + + let mut usage = conv::map_buffer_usage(desc.usage); + + if desc.usage.is_empty() || desc.usage.contains_invalid_bits() { + return Err(resource::CreateBufferError::InvalidUsage(desc.usage)); + } + + if !self + .features + .contains(wgt::Features::MAPPABLE_PRIMARY_BUFFERS) + { + use wgt::BufferUsages as Bu; + let write_mismatch = desc.usage.contains(Bu::MAP_WRITE) + && !(Bu::MAP_WRITE | Bu::COPY_SRC).contains(desc.usage); + let read_mismatch = desc.usage.contains(Bu::MAP_READ) + && !(Bu::MAP_READ | Bu::COPY_DST).contains(desc.usage); + if write_mismatch || read_mismatch { + return Err(resource::CreateBufferError::UsageMismatch(desc.usage)); + } + } + + if desc.mapped_at_creation { + if desc.size % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(resource::CreateBufferError::UnalignedSize); + } + if !desc.usage.contains(wgt::BufferUsages::MAP_WRITE) { + // we are going to be copying into it, internally + usage |= hal::BufferUses::COPY_DST; + } + } else { + // We are required to zero out (initialize) all memory. This is done + // on demand using clear_buffer which requires write transfer usage! + usage |= hal::BufferUses::COPY_DST; + } + + let actual_size = if desc.size == 0 { + wgt::COPY_BUFFER_ALIGNMENT + } else if desc.usage.contains(wgt::BufferUsages::VERTEX) { + // Bumping the size by 1 so that we can bind an empty range at the + // end of the buffer. + desc.size + 1 + } else { + desc.size + }; + let clear_remainder = actual_size % wgt::COPY_BUFFER_ALIGNMENT; + let aligned_size = if clear_remainder != 0 { + actual_size + wgt::COPY_BUFFER_ALIGNMENT - clear_remainder + } else { + actual_size + }; + + let mut memory_flags = hal::MemoryFlags::empty(); + memory_flags.set(hal::MemoryFlags::TRANSIENT, transient); + + let hal_desc = hal::BufferDescriptor { + label: desc.label.borrow_option(), + size: aligned_size, + usage, + memory_flags, + }; + let buffer = unsafe { self.raw.create_buffer(&hal_desc) }.map_err(DeviceError::from)?; + + Ok(resource::Buffer { + raw: Some(buffer), + device_id: Stored { + value: id::Valid(self_id), + ref_count: self.life_guard.add_ref(), + }, + usage: desc.usage, + size: desc.size, + initialization_status: BufferInitTracker::new(desc.size), + sync_mapped_writes: None, + map_state: resource::BufferMapState::Idle, + life_guard: LifeGuard::new(desc.label.borrow_or_default()), + }) + } + + fn create_texture_from_hal( + &self, + hal_texture: A::Texture, + hal_usage: hal::TextureUses, + self_id: id::DeviceId, + desc: &resource::TextureDescriptor, + format_features: wgt::TextureFormatFeatures, + clear_mode: resource::TextureClearMode<A>, + ) -> resource::Texture<A> { + debug_assert_eq!(self_id.backend(), A::VARIANT); + + resource::Texture { + inner: resource::TextureInner::Native { + raw: Some(hal_texture), + }, + device_id: Stored { + value: id::Valid(self_id), + ref_count: self.life_guard.add_ref(), + }, + desc: desc.map_label(|_| ()), + hal_usage, + format_features, + initialization_status: TextureInitTracker::new( + desc.mip_level_count, + desc.array_layer_count(), + ), + full_range: TextureSelector { + mips: 0..desc.mip_level_count, + layers: 0..desc.array_layer_count(), + }, + life_guard: LifeGuard::new(desc.label.borrow_or_default()), + clear_mode, + } + } + + fn create_texture( + &self, + self_id: id::DeviceId, + adapter: &Adapter<A>, + desc: &resource::TextureDescriptor, + ) -> Result<resource::Texture<A>, resource::CreateTextureError> { + use resource::{CreateTextureError, TextureDimensionError}; + + if desc.usage.is_empty() || desc.usage.contains_invalid_bits() { + return Err(CreateTextureError::InvalidUsage(desc.usage)); + } + + conv::check_texture_dimension_size( + desc.dimension, + desc.size, + desc.sample_count, + &self.limits, + )?; + + let format_desc = desc.format.describe(); + + if desc.dimension != wgt::TextureDimension::D2 { + // Depth textures can only be 2D + if format_desc.sample_type == wgt::TextureSampleType::Depth { + return Err(CreateTextureError::InvalidDepthDimension( + desc.dimension, + desc.format, + )); + } + // Renderable textures can only be 2D + if desc.usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) { + return Err(CreateTextureError::InvalidDimensionUsages( + wgt::TextureUsages::RENDER_ATTACHMENT, + desc.dimension, + )); + } + + // Compressed textures can only be 2D + if format_desc.is_compressed() { + return Err(CreateTextureError::InvalidCompressedDimension( + desc.dimension, + desc.format, + )); + } + } + + if format_desc.is_compressed() { + let block_width = format_desc.block_dimensions.0 as u32; + let block_height = format_desc.block_dimensions.1 as u32; + + if desc.size.width % block_width != 0 { + return Err(CreateTextureError::InvalidDimension( + TextureDimensionError::NotMultipleOfBlockWidth { + width: desc.size.width, + block_width, + format: desc.format, + }, + )); + } + + if desc.size.height % block_height != 0 { + return Err(CreateTextureError::InvalidDimension( + TextureDimensionError::NotMultipleOfBlockHeight { + height: desc.size.height, + block_height, + format: desc.format, + }, + )); + } + } + + let format_features = self + .describe_format_features(adapter, desc.format) + .map_err(|error| CreateTextureError::MissingFeatures(desc.format, error))?; + + if desc.sample_count > 1 { + if desc.mip_level_count != 1 { + return Err(CreateTextureError::InvalidMipLevelCount { + requested: desc.mip_level_count, + maximum: 1, + }); + } + + if desc.size.depth_or_array_layers != 1 { + return Err(CreateTextureError::InvalidDimension( + TextureDimensionError::MultisampledDepthOrArrayLayer( + desc.size.depth_or_array_layers, + ), + )); + } + + if desc.usage.contains(wgt::TextureUsages::STORAGE_BINDING) { + return Err(CreateTextureError::InvalidMultisampledStorageBinding); + } + + if !desc.usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) { + return Err(CreateTextureError::MultisampledNotRenderAttachment); + } + + if !format_features.flags.intersects( + wgt::TextureFormatFeatureFlags::MULTISAMPLE_X4 + | wgt::TextureFormatFeatureFlags::MULTISAMPLE_X2 + | wgt::TextureFormatFeatureFlags::MULTISAMPLE_X8, + ) { + return Err(CreateTextureError::InvalidMultisampledFormat(desc.format)); + } + + if !format_features + .flags + .sample_count_supported(desc.sample_count) + { + return Err(CreateTextureError::InvalidSampleCount( + desc.sample_count, + desc.format, + )); + }; + } + + let mips = desc.mip_level_count; + let max_levels_allowed = desc.size.max_mips(desc.dimension).min(hal::MAX_MIP_LEVELS); + if mips == 0 || mips > max_levels_allowed { + return Err(CreateTextureError::InvalidMipLevelCount { + requested: mips, + maximum: max_levels_allowed, + }); + } + + let missing_allowed_usages = desc.usage - format_features.allowed_usages; + if !missing_allowed_usages.is_empty() { + // detect downlevel incompatibilities + let wgpu_allowed_usages = desc + .format + .describe() + .guaranteed_format_features + .allowed_usages; + let wgpu_missing_usages = desc.usage - wgpu_allowed_usages; + return Err(CreateTextureError::InvalidFormatUsages( + missing_allowed_usages, + desc.format, + wgpu_missing_usages.is_empty(), + )); + } + + // TODO: validate missing TextureDescriptor::view_formats. + + // Enforce having COPY_DST/DEPTH_STENCIL_WRIT/COLOR_TARGET otherwise we + // wouldn't be able to initialize the texture. + let hal_usage = conv::map_texture_usage(desc.usage, desc.format.into()) + | if format_desc.sample_type == wgt::TextureSampleType::Depth { + hal::TextureUses::DEPTH_STENCIL_WRITE + } else if desc.usage.contains(wgt::TextureUsages::COPY_DST) { + hal::TextureUses::COPY_DST // (set already) + } else { + // Use COPY_DST only if we can't use COLOR_TARGET + if format_features + .allowed_usages + .contains(wgt::TextureUsages::RENDER_ATTACHMENT) + && desc.dimension == wgt::TextureDimension::D2 + // Render targets dimension must be 2d + { + hal::TextureUses::COLOR_TARGET + } else { + hal::TextureUses::COPY_DST + } + }; + + let hal_desc = hal::TextureDescriptor { + label: desc.label.borrow_option(), + size: desc.size, + mip_level_count: desc.mip_level_count, + sample_count: desc.sample_count, + dimension: desc.dimension, + format: desc.format, + usage: hal_usage, + memory_flags: hal::MemoryFlags::empty(), + }; + + let raw_texture = unsafe { + self.raw + .create_texture(&hal_desc) + .map_err(DeviceError::from)? + }; + + let clear_mode = if hal_usage + .intersects(hal::TextureUses::DEPTH_STENCIL_WRITE | hal::TextureUses::COLOR_TARGET) + { + let (is_color, usage) = + if desc.format.describe().sample_type == wgt::TextureSampleType::Depth { + (false, hal::TextureUses::DEPTH_STENCIL_WRITE) + } else { + (true, hal::TextureUses::COLOR_TARGET) + }; + let dimension = match desc.dimension { + wgt::TextureDimension::D1 => wgt::TextureViewDimension::D1, + wgt::TextureDimension::D2 => wgt::TextureViewDimension::D2, + wgt::TextureDimension::D3 => unreachable!(), + }; + + let mut clear_views = SmallVec::new(); + for mip_level in 0..desc.mip_level_count { + for array_layer in 0..desc.size.depth_or_array_layers { + let desc = hal::TextureViewDescriptor { + label: Some("(wgpu internal) clear texture view"), + format: desc.format, + dimension, + usage, + range: wgt::ImageSubresourceRange { + aspect: wgt::TextureAspect::All, + base_mip_level: mip_level, + mip_level_count: NonZeroU32::new(1), + base_array_layer: array_layer, + array_layer_count: NonZeroU32::new(1), + }, + }; + clear_views.push( + unsafe { self.raw.create_texture_view(&raw_texture, &desc) } + .map_err(DeviceError::from)?, + ); + } + } + resource::TextureClearMode::RenderPass { + clear_views, + is_color, + } + } else { + resource::TextureClearMode::BufferCopy + }; + + let mut texture = self.create_texture_from_hal( + raw_texture, + hal_usage, + self_id, + desc, + format_features, + clear_mode, + ); + texture.hal_usage = hal_usage; + Ok(texture) + } + + fn create_texture_view( + &self, + texture: &resource::Texture<A>, + texture_id: id::TextureId, + desc: &resource::TextureViewDescriptor, + ) -> Result<resource::TextureView<A>, resource::CreateTextureViewError> { + let texture_raw = texture + .inner + .as_raw() + .ok_or(resource::CreateTextureViewError::InvalidTexture)?; + + let view_dim = match desc.dimension { + Some(dim) => { + // check if the dimension is compatible with the texture + if texture.desc.dimension != dim.compatible_texture_dimension() { + return Err( + resource::CreateTextureViewError::InvalidTextureViewDimension { + view: dim, + texture: texture.desc.dimension, + }, + ); + } + // check if multisampled texture is seen as anything but 2D + match dim { + wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => {} + _ if texture.desc.sample_count > 1 => { + return Err(resource::CreateTextureViewError::InvalidMultisampledTextureViewDimension(dim)); + } + _ => {} + } + dim + } + None => match texture.desc.dimension { + wgt::TextureDimension::D1 => wgt::TextureViewDimension::D1, + wgt::TextureDimension::D2 if texture.desc.size.depth_or_array_layers > 1 => { + wgt::TextureViewDimension::D2Array + } + wgt::TextureDimension::D2 => wgt::TextureViewDimension::D2, + wgt::TextureDimension::D3 => wgt::TextureViewDimension::D3, + }, + }; + + let mip_count = desc.range.mip_level_count.map_or(1, |count| count.get()); + let required_level_count = desc.range.base_mip_level.saturating_add(mip_count); + + let required_layer_count = match desc.range.array_layer_count { + Some(count) => desc.range.base_array_layer.saturating_add(count.get()), + None => match view_dim { + wgt::TextureViewDimension::D1 + | wgt::TextureViewDimension::D2 + | wgt::TextureViewDimension::D3 => 1, + wgt::TextureViewDimension::Cube => 6, + _ => texture.desc.array_layer_count(), + } + .max(desc.range.base_array_layer.saturating_add(1)), + }; + + let level_end = texture.full_range.mips.end; + let layer_end = texture.full_range.layers.end; + if required_level_count > level_end { + return Err(resource::CreateTextureViewError::TooManyMipLevels { + requested: required_level_count, + total: level_end, + }); + } + if required_layer_count > layer_end { + return Err(resource::CreateTextureViewError::TooManyArrayLayers { + requested: required_layer_count, + total: layer_end, + }); + }; + + match view_dim { + TextureViewDimension::Cube if required_layer_count != 6 => { + return Err( + resource::CreateTextureViewError::InvalidCubemapTextureDepth { + depth: required_layer_count, + }, + ) + } + TextureViewDimension::CubeArray if required_layer_count % 6 != 0 => { + return Err( + resource::CreateTextureViewError::InvalidCubemapArrayTextureDepth { + depth: required_layer_count, + }, + ) + } + _ => {} + } + + let full_aspect = hal::FormatAspects::from(texture.desc.format); + let select_aspect = hal::FormatAspects::from(desc.range.aspect); + if (full_aspect & select_aspect).is_empty() { + return Err(resource::CreateTextureViewError::InvalidAspect { + texture_format: texture.desc.format, + requested_aspect: desc.range.aspect, + }); + } + + let end_level = desc + .range + .mip_level_count + .map_or(level_end, |_| required_level_count); + let end_layer = desc + .range + .array_layer_count + .map_or(layer_end, |_| required_layer_count); + let selector = TextureSelector { + mips: desc.range.base_mip_level..end_level, + layers: desc.range.base_array_layer..end_layer, + }; + + let view_layer_count = selector.layers.end - selector.layers.start; + let layer_check_ok = match view_dim { + wgt::TextureViewDimension::D1 + | wgt::TextureViewDimension::D2 + | wgt::TextureViewDimension::D3 => view_layer_count == 1, + wgt::TextureViewDimension::D2Array => true, + wgt::TextureViewDimension::Cube => view_layer_count == 6, + wgt::TextureViewDimension::CubeArray => view_layer_count % 6 == 0, + }; + if !layer_check_ok { + return Err(resource::CreateTextureViewError::InvalidArrayLayerCount { + requested: view_layer_count, + dim: view_dim, + }); + } + + let mut extent = texture + .desc + .mip_level_size(desc.range.base_mip_level) + .unwrap(); + if view_dim != wgt::TextureViewDimension::D3 { + extent.depth_or_array_layers = view_layer_count; + } + let format = desc.format.unwrap_or(texture.desc.format); + if format != texture.desc.format { + return Err(resource::CreateTextureViewError::FormatReinterpretation { + texture: texture.desc.format, + view: format, + }); + } + + // filter the usages based on the other criteria + let usage = { + let mask_copy = !(hal::TextureUses::COPY_SRC | hal::TextureUses::COPY_DST); + let mask_dimension = match view_dim { + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + hal::TextureUses::RESOURCE + } + wgt::TextureViewDimension::D3 => { + hal::TextureUses::RESOURCE + | hal::TextureUses::STORAGE_READ + | hal::TextureUses::STORAGE_READ_WRITE + } + _ => hal::TextureUses::all(), + }; + let mask_mip_level = if selector.mips.end - selector.mips.start != 1 { + hal::TextureUses::RESOURCE + } else { + hal::TextureUses::all() + }; + texture.hal_usage & mask_copy & mask_dimension & mask_mip_level + }; + + log::debug!( + "Create view for texture {:?} filters usages to {:?}", + texture_id, + usage + ); + let hal_desc = hal::TextureViewDescriptor { + label: desc.label.borrow_option(), + format, + dimension: view_dim, + usage, + range: desc.range.clone(), + }; + + let raw = unsafe { + self.raw + .create_texture_view(texture_raw, &hal_desc) + .map_err(|_| resource::CreateTextureViewError::OutOfMemory)? + }; + + Ok(resource::TextureView { + raw, + parent_id: Stored { + value: id::Valid(texture_id), + ref_count: texture.life_guard.add_ref(), + }, + device_id: texture.device_id.clone(), + desc: resource::HalTextureViewDescriptor { + format: hal_desc.format, + dimension: hal_desc.dimension, + range: hal_desc.range, + }, + format_features: texture.format_features, + extent, + samples: texture.desc.sample_count, + selector, + life_guard: LifeGuard::new(desc.label.borrow_or_default()), + }) + } + + fn create_sampler( + &self, + self_id: id::DeviceId, + desc: &resource::SamplerDescriptor, + ) -> Result<resource::Sampler<A>, resource::CreateSamplerError> { + if desc + .address_modes + .iter() + .any(|am| am == &wgt::AddressMode::ClampToBorder) + { + self.require_features(wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER)?; + } + + if desc.border_color == Some(wgt::SamplerBorderColor::Zero) { + self.require_features(wgt::Features::ADDRESS_MODE_CLAMP_TO_ZERO)?; + } + + let lod_clamp = if desc.lod_min_clamp > 0.0 || desc.lod_max_clamp < 32.0 { + Some(desc.lod_min_clamp..desc.lod_max_clamp) + } else { + None + }; + + let anisotropy_clamp = if let Some(clamp) = desc.anisotropy_clamp { + let clamp = clamp.get(); + let valid_clamp = + clamp <= hal::MAX_ANISOTROPY && conv::is_power_of_two_u32(clamp as u32); + if !valid_clamp { + return Err(resource::CreateSamplerError::InvalidClamp(clamp)); + } + if self + .downlevel + .flags + .contains(wgt::DownlevelFlags::ANISOTROPIC_FILTERING) + { + std::num::NonZeroU8::new(clamp) + } else { + None + } + } else { + None + }; + + //TODO: check for wgt::DownlevelFlags::COMPARISON_SAMPLERS + + let hal_desc = hal::SamplerDescriptor { + label: desc.label.borrow_option(), + address_modes: desc.address_modes, + mag_filter: desc.mag_filter, + min_filter: desc.min_filter, + mipmap_filter: desc.mipmap_filter, + lod_clamp, + compare: desc.compare, + anisotropy_clamp, + border_color: desc.border_color, + }; + + let raw = unsafe { + self.raw + .create_sampler(&hal_desc) + .map_err(DeviceError::from)? + }; + Ok(resource::Sampler { + raw, + device_id: Stored { + value: id::Valid(self_id), + ref_count: self.life_guard.add_ref(), + }, + life_guard: LifeGuard::new(desc.label.borrow_or_default()), + comparison: desc.compare.is_some(), + filtering: desc.min_filter == wgt::FilterMode::Linear + || desc.mag_filter == wgt::FilterMode::Linear, + }) + } + + fn create_shader_module<'a>( + &self, + self_id: id::DeviceId, + desc: &pipeline::ShaderModuleDescriptor<'a>, + source: pipeline::ShaderModuleSource<'a>, + ) -> Result<pipeline::ShaderModule<A>, pipeline::CreateShaderModuleError> { + let (module, source) = match source { + #[cfg(feature = "wgsl")] + pipeline::ShaderModuleSource::Wgsl(code) => { + profiling::scope!("naga::wgsl::parse_str"); + let module = naga::front::wgsl::parse_str(&code).map_err(|inner| { + pipeline::CreateShaderModuleError::Parsing(pipeline::ShaderError { + source: code.to_string(), + label: desc.label.as_ref().map(|l| l.to_string()), + inner: Box::new(inner), + }) + })?; + (Cow::Owned(module), code.into_owned()) + } + pipeline::ShaderModuleSource::Naga(module) => (module, String::new()), + pipeline::ShaderModuleSource::Dummy(_) => panic!("found `ShaderModuleSource::Dummy`"), + }; + for (_, var) in module.global_variables.iter() { + match var.binding { + Some(ref br) if br.group >= self.limits.max_bind_groups => { + return Err(pipeline::CreateShaderModuleError::InvalidGroupIndex { + bind: br.clone(), + group: br.group, + limit: self.limits.max_bind_groups, + }); + } + _ => continue, + }; + } + + use naga::valid::Capabilities as Caps; + profiling::scope!("naga::validate"); + + let mut caps = Caps::empty(); + caps.set( + Caps::PUSH_CONSTANT, + self.features.contains(wgt::Features::PUSH_CONSTANTS), + ); + caps.set( + Caps::FLOAT64, + self.features.contains(wgt::Features::SHADER_FLOAT64), + ); + caps.set( + Caps::PRIMITIVE_INDEX, + self.features + .contains(wgt::Features::SHADER_PRIMITIVE_INDEX), + ); + caps.set( + Caps::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + self.features.contains( + wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + ), + ); + caps.set( + Caps::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + self.features.contains( + wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, + ), + ); + // TODO: This needs a proper wgpu feature + caps.set( + Caps::SAMPLER_NON_UNIFORM_INDEXING, + self.features.contains( + wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + ), + ); + let info = naga::valid::Validator::new(naga::valid::ValidationFlags::all(), caps) + .validate(&module) + .map_err(|inner| { + pipeline::CreateShaderModuleError::Validation(pipeline::ShaderError { + source, + label: desc.label.as_ref().map(|l| l.to_string()), + inner: Box::new(inner), + }) + })?; + let interface = + validation::Interface::new(&module, &info, self.features, self.limits.clone()); + let hal_shader = hal::ShaderInput::Naga(hal::NagaShader { module, info }); + + let hal_desc = hal::ShaderModuleDescriptor { + label: desc.label.borrow_option(), + runtime_checks: desc.shader_bound_checks.runtime_checks(), + }; + let raw = match unsafe { self.raw.create_shader_module(&hal_desc, hal_shader) } { + Ok(raw) => raw, + Err(error) => { + return Err(match error { + hal::ShaderError::Device(error) => { + pipeline::CreateShaderModuleError::Device(error.into()) + } + hal::ShaderError::Compilation(ref msg) => { + log::error!("Shader error: {}", msg); + pipeline::CreateShaderModuleError::Generation + } + }) + } + }; + + Ok(pipeline::ShaderModule { + raw, + device_id: Stored { + value: id::Valid(self_id), + ref_count: self.life_guard.add_ref(), + }, + interface: Some(interface), + #[cfg(debug_assertions)] + label: desc.label.borrow_or_default().to_string(), + }) + } + + #[allow(unused_unsafe)] + unsafe fn create_shader_module_spirv<'a>( + &self, + self_id: id::DeviceId, + desc: &pipeline::ShaderModuleDescriptor<'a>, + source: &'a [u32], + ) -> Result<pipeline::ShaderModule<A>, pipeline::CreateShaderModuleError> { + self.require_features(wgt::Features::SPIRV_SHADER_PASSTHROUGH)?; + let hal_desc = hal::ShaderModuleDescriptor { + label: desc.label.borrow_option(), + runtime_checks: desc.shader_bound_checks.runtime_checks(), + }; + let hal_shader = hal::ShaderInput::SpirV(source); + let raw = match unsafe { self.raw.create_shader_module(&hal_desc, hal_shader) } { + Ok(raw) => raw, + Err(error) => { + return Err(match error { + hal::ShaderError::Device(error) => { + pipeline::CreateShaderModuleError::Device(error.into()) + } + hal::ShaderError::Compilation(ref msg) => { + log::error!("Shader error: {}", msg); + pipeline::CreateShaderModuleError::Generation + } + }) + } + }; + + Ok(pipeline::ShaderModule { + raw, + device_id: Stored { + value: id::Valid(self_id), + ref_count: self.life_guard.add_ref(), + }, + interface: None, + #[cfg(debug_assertions)] + label: desc.label.borrow_or_default().to_string(), + }) + } + + fn deduplicate_bind_group_layout( + self_id: id::DeviceId, + entry_map: &binding_model::BindEntryMap, + guard: &Storage<binding_model::BindGroupLayout<A>, id::BindGroupLayoutId>, + ) -> Option<id::BindGroupLayoutId> { + guard + .iter(self_id.backend()) + .find(|&(_, bgl)| bgl.device_id.value.0 == self_id && bgl.entries == *entry_map) + .map(|(id, value)| { + value.multi_ref_count.inc(); + id + }) + } + + fn get_introspection_bind_group_layouts<'a>( + pipeline_layout: &binding_model::PipelineLayout<A>, + bgl_guard: &'a Storage<binding_model::BindGroupLayout<A>, id::BindGroupLayoutId>, + ) -> ArrayVec<&'a binding_model::BindEntryMap, { hal::MAX_BIND_GROUPS }> { + pipeline_layout + .bind_group_layout_ids + .iter() + .map(|&id| &bgl_guard[id].entries) + .collect() + } + + /// Generate information about late-validated buffer bindings for pipelines. + //TODO: should this be combined with `get_introspection_bind_group_layouts` in some way? + fn make_late_sized_buffer_groups<'a>( + shader_binding_sizes: &FastHashMap<naga::ResourceBinding, wgt::BufferSize>, + layout: &binding_model::PipelineLayout<A>, + bgl_guard: &'a Storage<binding_model::BindGroupLayout<A>, id::BindGroupLayoutId>, + ) -> ArrayVec<pipeline::LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }> { + // Given the shader-required binding sizes and the pipeline layout, + // return the filtered list of them in the layout order, + // removing those with given `min_binding_size`. + layout + .bind_group_layout_ids + .iter() + .enumerate() + .map(|(group_index, &bgl_id)| pipeline::LateSizedBufferGroup { + shader_sizes: bgl_guard[bgl_id] + .entries + .values() + .filter_map(|entry| match entry.ty { + wgt::BindingType::Buffer { + min_binding_size: None, + .. + } => { + let rb = naga::ResourceBinding { + group: group_index as u32, + binding: entry.binding, + }; + let shader_size = + shader_binding_sizes.get(&rb).map_or(0, |nz| nz.get()); + Some(shader_size) + } + _ => None, + }) + .collect(), + }) + .collect() + } + + fn create_bind_group_layout( + &self, + self_id: id::DeviceId, + label: Option<&str>, + entry_map: binding_model::BindEntryMap, + ) -> Result<binding_model::BindGroupLayout<A>, binding_model::CreateBindGroupLayoutError> { + #[derive(PartialEq)] + enum WritableStorage { + Yes, + No, + } + + for entry in entry_map.values() { + use wgt::BindingType as Bt; + + let mut required_features = wgt::Features::empty(); + let mut required_downlevel_flags = wgt::DownlevelFlags::empty(); + let (array_feature, writable_storage) = match entry.ty { + Bt::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: _, + } => ( + Some(wgt::Features::BUFFER_BINDING_ARRAY), + WritableStorage::No, + ), + Bt::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: true, + min_binding_size: _, + } => ( + Some(wgt::Features::BUFFER_BINDING_ARRAY), + WritableStorage::No, + ), + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only }, + .. + } => ( + Some( + wgt::Features::BUFFER_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY, + ), + match read_only { + true => WritableStorage::No, + false => WritableStorage::Yes, + }, + ), + Bt::Sampler { .. } => ( + Some(wgt::Features::TEXTURE_BINDING_ARRAY), + WritableStorage::No, + ), + Bt::Texture { .. } => ( + Some(wgt::Features::TEXTURE_BINDING_ARRAY), + WritableStorage::No, + ), + Bt::StorageTexture { + access, + view_dimension, + format: _, + } => { + match view_dimension { + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + return Err(binding_model::CreateBindGroupLayoutError::Entry { + binding: entry.binding, + error: binding_model::BindGroupLayoutEntryError::StorageTextureCube, + }) + } + _ => (), + } + match access { + wgt::StorageTextureAccess::ReadOnly + | wgt::StorageTextureAccess::ReadWrite + if !self.features.contains( + wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES, + ) => + { + return Err(binding_model::CreateBindGroupLayoutError::Entry { + binding: entry.binding, + error: binding_model::BindGroupLayoutEntryError::StorageTextureReadWrite, + }); + } + _ => (), + } + ( + Some( + wgt::Features::TEXTURE_BINDING_ARRAY + | wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY, + ), + match access { + wgt::StorageTextureAccess::WriteOnly => WritableStorage::Yes, + wgt::StorageTextureAccess::ReadOnly => { + required_features |= + wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES; + WritableStorage::No + } + wgt::StorageTextureAccess::ReadWrite => { + required_features |= + wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES; + WritableStorage::Yes + } + }, + ) + } + }; + + // Validate the count parameter + if entry.count.is_some() { + required_features |= array_feature + .ok_or(binding_model::BindGroupLayoutEntryError::ArrayUnsupported) + .map_err(|error| binding_model::CreateBindGroupLayoutError::Entry { + binding: entry.binding, + error, + })?; + } + + if entry.visibility.contains_invalid_bits() { + return Err( + binding_model::CreateBindGroupLayoutError::InvalidVisibility(entry.visibility), + ); + } + + if entry.visibility.contains(wgt::ShaderStages::VERTEX) { + if writable_storage == WritableStorage::Yes { + required_features |= wgt::Features::VERTEX_WRITABLE_STORAGE; + } + if let Bt::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, + .. + } = entry.ty + { + required_downlevel_flags |= wgt::DownlevelFlags::VERTEX_STORAGE; + } + } + if writable_storage == WritableStorage::Yes + && entry.visibility.contains(wgt::ShaderStages::FRAGMENT) + { + required_downlevel_flags |= wgt::DownlevelFlags::FRAGMENT_WRITABLE_STORAGE; + } + + self.require_features(required_features) + .map_err(binding_model::BindGroupLayoutEntryError::MissingFeatures) + .map_err(|error| binding_model::CreateBindGroupLayoutError::Entry { + binding: entry.binding, + error, + })?; + self.require_downlevel_flags(required_downlevel_flags) + .map_err(binding_model::BindGroupLayoutEntryError::MissingDownlevelFlags) + .map_err(|error| binding_model::CreateBindGroupLayoutError::Entry { + binding: entry.binding, + error, + })?; + } + + let bgl_flags = conv::bind_group_layout_flags(self.features); + + let mut hal_bindings = entry_map.values().cloned().collect::<Vec<_>>(); + hal_bindings.sort_by_key(|b| b.binding); + let hal_desc = hal::BindGroupLayoutDescriptor { + label, + flags: bgl_flags, + entries: &hal_bindings, + }; + let raw = unsafe { + self.raw + .create_bind_group_layout(&hal_desc) + .map_err(DeviceError::from)? + }; + + let mut count_validator = binding_model::BindingTypeMaxCountValidator::default(); + for entry in entry_map.values() { + count_validator.add_binding(entry); + } + // If a single bind group layout violates limits, the pipeline layout is + // definitely going to violate limits too, lets catch it now. + count_validator + .validate(&self.limits) + .map_err(binding_model::CreateBindGroupLayoutError::TooManyBindings)?; + + Ok(binding_model::BindGroupLayout { + raw, + device_id: Stored { + value: id::Valid(self_id), + ref_count: self.life_guard.add_ref(), + }, + multi_ref_count: MultiRefCount::new(), + dynamic_count: entry_map + .values() + .filter(|b| b.ty.has_dynamic_offset()) + .count(), + count_validator, + entries: entry_map, + #[cfg(debug_assertions)] + label: label.unwrap_or("").to_string(), + }) + } + + fn create_buffer_binding<'a>( + bb: &binding_model::BufferBinding, + binding: u32, + decl: &wgt::BindGroupLayoutEntry, + used_buffer_ranges: &mut Vec<BufferInitTrackerAction>, + dynamic_binding_info: &mut Vec<binding_model::BindGroupDynamicBindingData>, + late_buffer_binding_sizes: &mut FastHashMap<u32, wgt::BufferSize>, + used: &mut BindGroupStates<A>, + storage: &'a Storage<resource::Buffer<A>, id::BufferId>, + limits: &wgt::Limits, + ) -> Result<hal::BufferBinding<'a, A>, binding_model::CreateBindGroupError> { + use crate::binding_model::CreateBindGroupError as Error; + + let (binding_ty, dynamic, min_size) = match decl.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + min_binding_size, + } => (ty, has_dynamic_offset, min_binding_size), + _ => { + return Err(Error::WrongBindingType { + binding, + actual: decl.ty, + expected: "UniformBuffer, StorageBuffer or ReadonlyStorageBuffer", + }) + } + }; + let (pub_usage, internal_use, range_limit) = match binding_ty { + wgt::BufferBindingType::Uniform => ( + wgt::BufferUsages::UNIFORM, + hal::BufferUses::UNIFORM, + limits.max_uniform_buffer_binding_size, + ), + wgt::BufferBindingType::Storage { read_only } => ( + wgt::BufferUsages::STORAGE, + if read_only { + hal::BufferUses::STORAGE_READ + } else { + hal::BufferUses::STORAGE_READ_WRITE + }, + limits.max_storage_buffer_binding_size, + ), + }; + + let (align, align_limit_name) = + binding_model::buffer_binding_type_alignment(limits, binding_ty); + if bb.offset % align as u64 != 0 { + return Err(Error::UnalignedBufferOffset( + bb.offset, + align_limit_name, + align, + )); + } + + let buffer = used + .buffers + .add_single(storage, bb.buffer_id, internal_use) + .ok_or(Error::InvalidBuffer(bb.buffer_id))?; + check_buffer_usage(buffer.usage, pub_usage)?; + let raw_buffer = buffer + .raw + .as_ref() + .ok_or(Error::InvalidBuffer(bb.buffer_id))?; + + let (bind_size, bind_end) = match bb.size { + Some(size) => { + let end = bb.offset + size.get(); + if end > buffer.size { + return Err(Error::BindingRangeTooLarge { + buffer: bb.buffer_id, + range: bb.offset..end, + size: buffer.size, + }); + } + (size.get(), end) + } + None => (buffer.size - bb.offset, buffer.size), + }; + + if bind_size > range_limit as u64 { + return Err(Error::BufferRangeTooLarge { + binding, + given: bind_size as u32, + limit: range_limit, + }); + } + + // Record binding info for validating dynamic offsets + if dynamic { + dynamic_binding_info.push(binding_model::BindGroupDynamicBindingData { + binding_idx: binding, + buffer_size: buffer.size, + binding_range: bb.offset..bind_end, + maximum_dynamic_offset: buffer.size - bind_end, + binding_type: binding_ty, + }); + } + + if let Some(non_zero) = min_size { + let min_size = non_zero.get(); + if min_size > bind_size { + return Err(Error::BindingSizeTooSmall { + buffer: bb.buffer_id, + actual: bind_size, + min: min_size, + }); + } + } else { + let late_size = + wgt::BufferSize::new(bind_size).ok_or(Error::BindingZeroSize(bb.buffer_id))?; + late_buffer_binding_sizes.insert(binding, late_size); + } + + assert_eq!(bb.offset % wgt::COPY_BUFFER_ALIGNMENT, 0); + used_buffer_ranges.extend(buffer.initialization_status.create_action( + bb.buffer_id, + bb.offset..bb.offset + bind_size, + MemoryInitKind::NeedsInitializedMemory, + )); + + Ok(hal::BufferBinding { + buffer: raw_buffer, + offset: bb.offset, + size: bb.size, + }) + } + + fn create_texture_binding( + view: &resource::TextureView<A>, + texture_guard: &Storage<resource::Texture<A>, id::TextureId>, + internal_use: hal::TextureUses, + pub_usage: wgt::TextureUsages, + used: &mut BindGroupStates<A>, + used_texture_ranges: &mut Vec<TextureInitTrackerAction>, + ) -> Result<(), binding_model::CreateBindGroupError> { + // Careful here: the texture may no longer have its own ref count, + // if it was deleted by the user. + let texture = used + .textures + .add_single( + texture_guard, + view.parent_id.value.0, + view.parent_id.ref_count.clone(), + Some(view.selector.clone()), + internal_use, + ) + .ok_or(binding_model::CreateBindGroupError::InvalidTexture( + view.parent_id.value.0, + ))?; + check_texture_usage(texture.desc.usage, pub_usage)?; + + used_texture_ranges.push(TextureInitTrackerAction { + id: view.parent_id.value.0, + range: TextureInitRange { + mip_range: view.desc.range.mip_range(&texture.desc), + layer_range: view.desc.range.layer_range(&texture.desc), + }, + kind: MemoryInitKind::NeedsInitializedMemory, + }); + + Ok(()) + } + + fn create_bind_group<G: GlobalIdentityHandlerFactory>( + &self, + self_id: id::DeviceId, + layout: &binding_model::BindGroupLayout<A>, + desc: &binding_model::BindGroupDescriptor, + hub: &Hub<A, G>, + token: &mut Token<binding_model::BindGroupLayout<A>>, + ) -> Result<binding_model::BindGroup<A>, binding_model::CreateBindGroupError> { + use crate::binding_model::{BindingResource as Br, CreateBindGroupError as Error}; + { + // Check that the number of entries in the descriptor matches + // the number of entries in the layout. + let actual = desc.entries.len(); + let expected = layout.entries.len(); + if actual != expected { + return Err(Error::BindingsNumMismatch { expected, actual }); + } + } + + // TODO: arrayvec/smallvec, or re-use allocations + // Record binding info for dynamic offset validation + let mut dynamic_binding_info = Vec::new(); + // Map of binding -> shader reflected size + //Note: we can't collect into a vector right away because + // it needs to be in BGL iteration order, not BG entry order. + let mut late_buffer_binding_sizes = FastHashMap::default(); + // fill out the descriptors + let mut used = BindGroupStates::new(); + + let (buffer_guard, mut token) = hub.buffers.read(token); + let (texture_guard, mut token) = hub.textures.read(&mut token); //skip token + let (texture_view_guard, mut token) = hub.texture_views.read(&mut token); + let (sampler_guard, _) = hub.samplers.read(&mut token); + + let mut used_buffer_ranges = Vec::new(); + let mut used_texture_ranges = Vec::new(); + let mut hal_entries = Vec::with_capacity(desc.entries.len()); + let mut hal_buffers = Vec::new(); + let mut hal_samplers = Vec::new(); + let mut hal_textures = Vec::new(); + for entry in desc.entries.iter() { + let binding = entry.binding; + // Find the corresponding declaration in the layout + let decl = layout + .entries + .get(&binding) + .ok_or(Error::MissingBindingDeclaration(binding))?; + let (res_index, count) = match entry.resource { + Br::Buffer(ref bb) => { + let bb = Self::create_buffer_binding( + bb, + binding, + decl, + &mut used_buffer_ranges, + &mut dynamic_binding_info, + &mut late_buffer_binding_sizes, + &mut used, + &*buffer_guard, + &self.limits, + )?; + + let res_index = hal_buffers.len(); + hal_buffers.push(bb); + (res_index, 1) + } + Br::BufferArray(ref bindings_array) => { + let num_bindings = bindings_array.len(); + Self::check_array_binding(self.features, decl.count, num_bindings)?; + + let res_index = hal_buffers.len(); + for bb in bindings_array.iter() { + let bb = Self::create_buffer_binding( + bb, + binding, + decl, + &mut used_buffer_ranges, + &mut dynamic_binding_info, + &mut late_buffer_binding_sizes, + &mut used, + &*buffer_guard, + &self.limits, + )?; + hal_buffers.push(bb); + } + (res_index, num_bindings) + } + Br::Sampler(id) => { + match decl.ty { + wgt::BindingType::Sampler(ty) => { + let sampler = used + .samplers + .add_single(&*sampler_guard, id) + .ok_or(Error::InvalidSampler(id))?; + + // Allowed sampler values for filtering and comparison + let (allowed_filtering, allowed_comparison) = match ty { + wgt::SamplerBindingType::Filtering => (None, false), + wgt::SamplerBindingType::NonFiltering => (Some(false), false), + wgt::SamplerBindingType::Comparison => (None, true), + }; + + if let Some(allowed_filtering) = allowed_filtering { + if allowed_filtering != sampler.filtering { + return Err(Error::WrongSamplerFiltering { + binding, + layout_flt: allowed_filtering, + sampler_flt: sampler.filtering, + }); + } + } + + if allowed_comparison != sampler.comparison { + return Err(Error::WrongSamplerComparison { + binding, + layout_cmp: allowed_comparison, + sampler_cmp: sampler.comparison, + }); + } + + let res_index = hal_samplers.len(); + hal_samplers.push(&sampler.raw); + (res_index, 1) + } + _ => { + return Err(Error::WrongBindingType { + binding, + actual: decl.ty, + expected: "Sampler", + }) + } + } + } + Br::SamplerArray(ref bindings_array) => { + let num_bindings = bindings_array.len(); + Self::check_array_binding(self.features, decl.count, num_bindings)?; + + let res_index = hal_samplers.len(); + for &id in bindings_array.iter() { + let sampler = used + .samplers + .add_single(&*sampler_guard, id) + .ok_or(Error::InvalidSampler(id))?; + hal_samplers.push(&sampler.raw); + } + + (res_index, num_bindings) + } + Br::TextureView(id) => { + let view = used + .views + .add_single(&*texture_view_guard, id) + .ok_or(Error::InvalidTextureView(id))?; + let (pub_usage, internal_use) = Self::texture_use_parameters( + binding, + decl, + view, + "SampledTexture, ReadonlyStorageTexture or WriteonlyStorageTexture", + )?; + Self::create_texture_binding( + view, + &texture_guard, + internal_use, + pub_usage, + &mut used, + &mut used_texture_ranges, + )?; + let res_index = hal_textures.len(); + hal_textures.push(hal::TextureBinding { + view: &view.raw, + usage: internal_use, + }); + (res_index, 1) + } + Br::TextureViewArray(ref bindings_array) => { + let num_bindings = bindings_array.len(); + Self::check_array_binding(self.features, decl.count, num_bindings)?; + + let res_index = hal_textures.len(); + for &id in bindings_array.iter() { + let view = used + .views + .add_single(&*texture_view_guard, id) + .ok_or(Error::InvalidTextureView(id))?; + let (pub_usage, internal_use) = + Self::texture_use_parameters(binding, decl, view, + "SampledTextureArray, ReadonlyStorageTextureArray or WriteonlyStorageTextureArray")?; + Self::create_texture_binding( + view, + &texture_guard, + internal_use, + pub_usage, + &mut used, + &mut used_texture_ranges, + )?; + hal_textures.push(hal::TextureBinding { + view: &view.raw, + usage: internal_use, + }); + } + + (res_index, num_bindings) + } + }; + + hal_entries.push(hal::BindGroupEntry { + binding, + resource_index: res_index as u32, + count: count as u32, + }); + } + + used.optimize(); + + hal_entries.sort_by_key(|entry| entry.binding); + for (a, b) in hal_entries.iter().zip(hal_entries.iter().skip(1)) { + if a.binding == b.binding { + return Err(Error::DuplicateBinding(a.binding)); + } + } + + let hal_desc = hal::BindGroupDescriptor { + label: desc.label.borrow_option(), + layout: &layout.raw, + entries: &hal_entries, + buffers: &hal_buffers, + samplers: &hal_samplers, + textures: &hal_textures, + }; + let raw = unsafe { + self.raw + .create_bind_group(&hal_desc) + .map_err(DeviceError::from)? + }; + + // manually add a dependency on BGL + layout.multi_ref_count.inc(); + + Ok(binding_model::BindGroup { + raw, + device_id: Stored { + value: id::Valid(self_id), + ref_count: self.life_guard.add_ref(), + }, + layout_id: id::Valid(desc.layout), + life_guard: LifeGuard::new(desc.label.borrow_or_default()), + used, + used_buffer_ranges, + used_texture_ranges, + dynamic_binding_info, + // collect in the order of BGL iteration + late_buffer_binding_sizes: layout + .entries + .keys() + .flat_map(|binding| late_buffer_binding_sizes.get(binding).cloned()) + .collect(), + }) + } + + fn check_array_binding( + features: wgt::Features, + count: Option<NonZeroU32>, + num_bindings: usize, + ) -> Result<(), super::binding_model::CreateBindGroupError> { + use super::binding_model::CreateBindGroupError as Error; + + if let Some(count) = count { + let count = count.get() as usize; + if count < num_bindings { + return Err(Error::BindingArrayPartialLengthMismatch { + actual: num_bindings, + expected: count, + }); + } + if count != num_bindings + && !features.contains(wgt::Features::PARTIALLY_BOUND_BINDING_ARRAY) + { + return Err(Error::BindingArrayLengthMismatch { + actual: num_bindings, + expected: count, + }); + } + if num_bindings == 0 { + return Err(Error::BindingArrayZeroLength); + } + } else { + return Err(Error::SingleBindingExpected); + }; + + Ok(()) + } + + fn texture_use_parameters( + binding: u32, + decl: &wgt::BindGroupLayoutEntry, + view: &crate::resource::TextureView<A>, + expected: &'static str, + ) -> Result<(wgt::TextureUsages, hal::TextureUses), binding_model::CreateBindGroupError> { + use crate::binding_model::CreateBindGroupError as Error; + if view + .desc + .aspects() + .contains(hal::FormatAspects::DEPTH | hal::FormatAspects::STENCIL) + { + return Err(Error::DepthStencilAspect); + } + let format_info = view.desc.format.describe(); + match decl.ty { + wgt::BindingType::Texture { + sample_type, + view_dimension, + multisampled, + } => { + use wgt::TextureSampleType as Tst; + if multisampled != (view.samples != 1) { + return Err(Error::InvalidTextureMultisample { + binding, + layout_multisampled: multisampled, + view_samples: view.samples, + }); + } + match (sample_type, format_info.sample_type) { + (Tst::Uint, Tst::Uint) | + (Tst::Sint, Tst::Sint) | + (Tst::Depth, Tst::Depth) | + // if we expect non-filterable, accept anything float + (Tst::Float { filterable: false }, Tst::Float { .. }) | + // if we expect filterable, require it + (Tst::Float { filterable: true }, Tst::Float { filterable: true }) | + // if we expect float, also accept depth + (Tst::Float { .. }, Tst::Depth, ..) => {} + // if we expect filterable, also accept Float that is defined as + // unfilterable if filterable feature is explicitly enabled (only hit + // if wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES is + // enabled) + (Tst::Float { filterable: true }, Tst::Float { .. }) if view.format_features.flags.contains(wgt::TextureFormatFeatureFlags::FILTERABLE) => {} + _ => { + return Err(Error::InvalidTextureSampleType { + binding, + layout_sample_type: sample_type, + view_format: view.desc.format, + }) + } + } + if view_dimension != view.desc.dimension { + return Err(Error::InvalidTextureDimension { + binding, + layout_dimension: view_dimension, + view_dimension: view.desc.dimension, + }); + } + Ok(( + wgt::TextureUsages::TEXTURE_BINDING, + hal::TextureUses::RESOURCE, + )) + } + wgt::BindingType::StorageTexture { + access, + format, + view_dimension, + } => { + if format != view.desc.format { + return Err(Error::InvalidStorageTextureFormat { + binding, + layout_format: format, + view_format: view.desc.format, + }); + } + if view_dimension != view.desc.dimension { + return Err(Error::InvalidTextureDimension { + binding, + layout_dimension: view_dimension, + view_dimension: view.desc.dimension, + }); + } + + let mip_level_count = view.selector.mips.end - view.selector.mips.start; + if mip_level_count != 1 { + return Err(Error::InvalidStorageTextureMipLevelCount { + binding, + mip_level_count, + }); + } + + let internal_use = match access { + wgt::StorageTextureAccess::WriteOnly => hal::TextureUses::STORAGE_READ_WRITE, + wgt::StorageTextureAccess::ReadOnly => { + if !view + .format_features + .flags + .contains(wgt::TextureFormatFeatureFlags::STORAGE_READ_WRITE) + { + return Err(Error::StorageReadNotSupported(view.desc.format)); + } + hal::TextureUses::STORAGE_READ + } + wgt::StorageTextureAccess::ReadWrite => { + if !view + .format_features + .flags + .contains(wgt::TextureFormatFeatureFlags::STORAGE_READ_WRITE) + { + return Err(Error::StorageReadNotSupported(view.desc.format)); + } + + hal::TextureUses::STORAGE_READ_WRITE + } + }; + Ok((wgt::TextureUsages::STORAGE_BINDING, internal_use)) + } + _ => Err(Error::WrongBindingType { + binding, + actual: decl.ty, + expected, + }), + } + } + + fn create_pipeline_layout( + &self, + self_id: id::DeviceId, + desc: &binding_model::PipelineLayoutDescriptor, + bgl_guard: &Storage<binding_model::BindGroupLayout<A>, id::BindGroupLayoutId>, + ) -> Result<binding_model::PipelineLayout<A>, binding_model::CreatePipelineLayoutError> { + use crate::binding_model::CreatePipelineLayoutError as Error; + + let bind_group_layouts_count = desc.bind_group_layouts.len(); + let device_max_bind_groups = self.limits.max_bind_groups as usize; + if bind_group_layouts_count > device_max_bind_groups { + return Err(Error::TooManyGroups { + actual: bind_group_layouts_count, + max: device_max_bind_groups, + }); + } + + if !desc.push_constant_ranges.is_empty() { + self.require_features(wgt::Features::PUSH_CONSTANTS)?; + } + + let mut used_stages = wgt::ShaderStages::empty(); + for (index, pc) in desc.push_constant_ranges.iter().enumerate() { + if pc.stages.intersects(used_stages) { + return Err(Error::MoreThanOnePushConstantRangePerStage { + index, + provided: pc.stages, + intersected: pc.stages & used_stages, + }); + } + used_stages |= pc.stages; + + let device_max_pc_size = self.limits.max_push_constant_size; + if device_max_pc_size < pc.range.end { + return Err(Error::PushConstantRangeTooLarge { + index, + range: pc.range.clone(), + max: device_max_pc_size, + }); + } + + if pc.range.start % wgt::PUSH_CONSTANT_ALIGNMENT != 0 { + return Err(Error::MisalignedPushConstantRange { + index, + bound: pc.range.start, + }); + } + if pc.range.end % wgt::PUSH_CONSTANT_ALIGNMENT != 0 { + return Err(Error::MisalignedPushConstantRange { + index, + bound: pc.range.end, + }); + } + } + + let mut count_validator = binding_model::BindingTypeMaxCountValidator::default(); + + // validate total resource counts + for &id in desc.bind_group_layouts.iter() { + let bind_group_layout = bgl_guard + .get(id) + .map_err(|_| Error::InvalidBindGroupLayout(id))?; + count_validator.merge(&bind_group_layout.count_validator); + } + count_validator + .validate(&self.limits) + .map_err(Error::TooManyBindings)?; + + let bgl_vec = desc + .bind_group_layouts + .iter() + .map(|&id| &bgl_guard.get(id).unwrap().raw) + .collect::<Vec<_>>(); + let hal_desc = hal::PipelineLayoutDescriptor { + label: desc.label.borrow_option(), + flags: hal::PipelineLayoutFlags::BASE_VERTEX_INSTANCE, + bind_group_layouts: &bgl_vec, + push_constant_ranges: desc.push_constant_ranges.as_ref(), + }; + + let raw = unsafe { + self.raw + .create_pipeline_layout(&hal_desc) + .map_err(DeviceError::from)? + }; + + Ok(binding_model::PipelineLayout { + raw, + device_id: Stored { + value: id::Valid(self_id), + ref_count: self.life_guard.add_ref(), + }, + life_guard: LifeGuard::new(desc.label.borrow_or_default()), + bind_group_layout_ids: desc + .bind_group_layouts + .iter() + .map(|&id| { + // manually add a dependency to BGL + bgl_guard.get(id).unwrap().multi_ref_count.inc(); + id::Valid(id) + }) + .collect(), + push_constant_ranges: desc.push_constant_ranges.iter().cloned().collect(), + }) + } + + //TODO: refactor this. It's the only method of `Device` that registers new objects + // (the pipeline layout). + fn derive_pipeline_layout( + &self, + self_id: id::DeviceId, + implicit_context: Option<ImplicitPipelineContext>, + mut derived_group_layouts: ArrayVec<binding_model::BindEntryMap, { hal::MAX_BIND_GROUPS }>, + bgl_guard: &mut Storage<binding_model::BindGroupLayout<A>, id::BindGroupLayoutId>, + pipeline_layout_guard: &mut Storage<binding_model::PipelineLayout<A>, id::PipelineLayoutId>, + ) -> Result<id::PipelineLayoutId, pipeline::ImplicitLayoutError> { + while derived_group_layouts + .last() + .map_or(false, |map| map.is_empty()) + { + derived_group_layouts.pop(); + } + let mut ids = implicit_context.ok_or(pipeline::ImplicitLayoutError::MissingIds(0))?; + let group_count = derived_group_layouts.len(); + if ids.group_ids.len() < group_count { + log::error!( + "Not enough bind group IDs ({}) specified for the implicit layout ({})", + ids.group_ids.len(), + derived_group_layouts.len() + ); + return Err(pipeline::ImplicitLayoutError::MissingIds(group_count as _)); + } + + for (bgl_id, map) in ids.group_ids.iter_mut().zip(derived_group_layouts) { + match Device::deduplicate_bind_group_layout(self_id, &map, bgl_guard) { + Some(dedup_id) => { + *bgl_id = dedup_id; + } + None => { + let bgl = self.create_bind_group_layout(self_id, None, map)?; + bgl_guard.force_replace(*bgl_id, bgl); + } + }; + } + + let layout_desc = binding_model::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: Cow::Borrowed(&ids.group_ids[..group_count]), + push_constant_ranges: Cow::Borrowed(&[]), //TODO? + }; + let layout = self.create_pipeline_layout(self_id, &layout_desc, bgl_guard)?; + pipeline_layout_guard.force_replace(ids.root_id, layout); + Ok(ids.root_id) + } + + fn create_compute_pipeline<G: GlobalIdentityHandlerFactory>( + &self, + self_id: id::DeviceId, + desc: &pipeline::ComputePipelineDescriptor, + implicit_context: Option<ImplicitPipelineContext>, + hub: &Hub<A, G>, + token: &mut Token<Self>, + ) -> Result<pipeline::ComputePipeline<A>, pipeline::CreateComputePipelineError> { + //TODO: only lock mutable if the layout is derived + let (mut pipeline_layout_guard, mut token) = hub.pipeline_layouts.write(token); + let (mut bgl_guard, mut token) = hub.bind_group_layouts.write(&mut token); + + // This has to be done first, or otherwise the IDs may be pointing to entries + // that are not even in the storage. + if let Some(ref ids) = implicit_context { + pipeline_layout_guard.insert_error(ids.root_id, IMPLICIT_FAILURE); + for &bgl_id in ids.group_ids.iter() { + bgl_guard.insert_error(bgl_id, IMPLICIT_FAILURE); + } + } + + self.require_downlevel_flags(wgt::DownlevelFlags::COMPUTE_SHADERS)?; + + let mut derived_group_layouts = + ArrayVec::<binding_model::BindEntryMap, { hal::MAX_BIND_GROUPS }>::new(); + let mut shader_binding_sizes = FastHashMap::default(); + + let io = validation::StageIo::default(); + let (shader_module_guard, _) = hub.shader_modules.read(&mut token); + + let shader_module = shader_module_guard + .get(desc.stage.module) + .map_err(|_| validation::StageError::InvalidModule)?; + + { + let flag = wgt::ShaderStages::COMPUTE; + let provided_layouts = match desc.layout { + Some(pipeline_layout_id) => Some(Device::get_introspection_bind_group_layouts( + pipeline_layout_guard + .get(pipeline_layout_id) + .map_err(|_| pipeline::CreateComputePipelineError::InvalidLayout)?, + &*bgl_guard, + )), + None => { + for _ in 0..self.limits.max_bind_groups { + derived_group_layouts.push(binding_model::BindEntryMap::default()); + } + None + } + }; + if let Some(ref interface) = shader_module.interface { + let _ = interface.check_stage( + provided_layouts.as_ref().map(|p| p.as_slice()), + &mut derived_group_layouts, + &mut shader_binding_sizes, + &desc.stage.entry_point, + flag, + io, + None, + )?; + } + } + + let pipeline_layout_id = match desc.layout { + Some(id) => id, + None => self.derive_pipeline_layout( + self_id, + implicit_context, + derived_group_layouts, + &mut *bgl_guard, + &mut *pipeline_layout_guard, + )?, + }; + let layout = pipeline_layout_guard + .get(pipeline_layout_id) + .map_err(|_| pipeline::CreateComputePipelineError::InvalidLayout)?; + + let late_sized_buffer_groups = + Device::make_late_sized_buffer_groups(&shader_binding_sizes, layout, &*bgl_guard); + + let pipeline_desc = hal::ComputePipelineDescriptor { + label: desc.label.borrow_option(), + layout: &layout.raw, + stage: hal::ProgrammableStage { + entry_point: desc.stage.entry_point.as_ref(), + module: &shader_module.raw, + }, + }; + + let raw = + unsafe { self.raw.create_compute_pipeline(&pipeline_desc) }.map_err( + |err| match err { + hal::PipelineError::Device(error) => { + pipeline::CreateComputePipelineError::Device(error.into()) + } + hal::PipelineError::Linkage(_stages, msg) => { + pipeline::CreateComputePipelineError::Internal(msg) + } + hal::PipelineError::EntryPoint(_stage) => { + pipeline::CreateComputePipelineError::Internal(EP_FAILURE.to_string()) + } + }, + )?; + + let pipeline = pipeline::ComputePipeline { + raw, + layout_id: Stored { + value: id::Valid(pipeline_layout_id), + ref_count: layout.life_guard.add_ref(), + }, + device_id: Stored { + value: id::Valid(self_id), + ref_count: self.life_guard.add_ref(), + }, + late_sized_buffer_groups, + life_guard: LifeGuard::new(desc.label.borrow_or_default()), + }; + Ok(pipeline) + } + + fn create_render_pipeline<G: GlobalIdentityHandlerFactory>( + &self, + self_id: id::DeviceId, + adapter: &Adapter<A>, + desc: &pipeline::RenderPipelineDescriptor, + implicit_context: Option<ImplicitPipelineContext>, + hub: &Hub<A, G>, + token: &mut Token<Self>, + ) -> Result<pipeline::RenderPipeline<A>, pipeline::CreateRenderPipelineError> { + use wgt::TextureFormatFeatureFlags as Tfff; + + //TODO: only lock mutable if the layout is derived + let (mut pipeline_layout_guard, mut token) = hub.pipeline_layouts.write(token); + let (mut bgl_guard, mut token) = hub.bind_group_layouts.write(&mut token); + + // This has to be done first, or otherwise the IDs may be pointing to entries + // that are not even in the storage. + if let Some(ref ids) = implicit_context { + pipeline_layout_guard.insert_error(ids.root_id, IMPLICIT_FAILURE); + for &bgl_id in ids.group_ids.iter() { + bgl_guard.insert_error(bgl_id, IMPLICIT_FAILURE); + } + } + + let mut derived_group_layouts = + ArrayVec::<binding_model::BindEntryMap, { hal::MAX_BIND_GROUPS }>::new(); + let mut shader_binding_sizes = FastHashMap::default(); + + let num_attachments = desc.fragment.as_ref().map(|f| f.targets.len()).unwrap_or(0); + if num_attachments > hal::MAX_COLOR_ATTACHMENTS { + return Err( + pipeline::CreateRenderPipelineError::TooManyColorAttachments { + given: num_attachments as u32, + limit: hal::MAX_COLOR_ATTACHMENTS as u32, + }, + ); + } + + let color_targets = desc + .fragment + .as_ref() + .map_or(&[][..], |fragment| &fragment.targets); + let depth_stencil_state = desc.depth_stencil.as_ref(); + + let cts: ArrayVec<_, { hal::MAX_COLOR_ATTACHMENTS }> = + color_targets.iter().filter_map(|x| x.as_ref()).collect(); + if !cts.is_empty() && { + let first = &cts[0]; + cts[1..] + .iter() + .any(|ct| ct.write_mask != first.write_mask || ct.blend != first.blend) + } { + log::info!("Color targets: {:?}", color_targets); + self.require_downlevel_flags(wgt::DownlevelFlags::INDEPENDENT_BLEND)?; + } + + let mut io = validation::StageIo::default(); + let mut validated_stages = wgt::ShaderStages::empty(); + + let mut vertex_steps = Vec::with_capacity(desc.vertex.buffers.len()); + let mut vertex_buffers = Vec::with_capacity(desc.vertex.buffers.len()); + let mut total_attributes = 0; + for (i, vb_state) in desc.vertex.buffers.iter().enumerate() { + vertex_steps.push(pipeline::VertexStep { + stride: vb_state.array_stride, + mode: vb_state.step_mode, + }); + if vb_state.attributes.is_empty() { + continue; + } + if vb_state.array_stride > self.limits.max_vertex_buffer_array_stride as u64 { + return Err(pipeline::CreateRenderPipelineError::VertexStrideTooLarge { + index: i as u32, + given: vb_state.array_stride as u32, + limit: self.limits.max_vertex_buffer_array_stride, + }); + } + if vb_state.array_stride % wgt::VERTEX_STRIDE_ALIGNMENT != 0 { + return Err(pipeline::CreateRenderPipelineError::UnalignedVertexStride { + index: i as u32, + stride: vb_state.array_stride, + }); + } + vertex_buffers.push(hal::VertexBufferLayout { + array_stride: vb_state.array_stride, + step_mode: vb_state.step_mode, + attributes: vb_state.attributes.as_ref(), + }); + + for attribute in vb_state.attributes.iter() { + if attribute.offset >= 0x10000000 { + return Err( + pipeline::CreateRenderPipelineError::InvalidVertexAttributeOffset { + location: attribute.shader_location, + offset: attribute.offset, + }, + ); + } + + if let wgt::VertexFormat::Float64 + | wgt::VertexFormat::Float64x2 + | wgt::VertexFormat::Float64x3 + | wgt::VertexFormat::Float64x4 = attribute.format + { + self.require_features(wgt::Features::VERTEX_ATTRIBUTE_64BIT)?; + } + + io.insert( + attribute.shader_location, + validation::InterfaceVar::vertex_attribute(attribute.format), + ); + } + total_attributes += vb_state.attributes.len(); + } + + if vertex_buffers.len() > self.limits.max_vertex_buffers as usize { + return Err(pipeline::CreateRenderPipelineError::TooManyVertexBuffers { + given: vertex_buffers.len() as u32, + limit: self.limits.max_vertex_buffers, + }); + } + if total_attributes > self.limits.max_vertex_attributes as usize { + return Err( + pipeline::CreateRenderPipelineError::TooManyVertexAttributes { + given: total_attributes as u32, + limit: self.limits.max_vertex_attributes, + }, + ); + } + + if desc.primitive.strip_index_format.is_some() && !desc.primitive.topology.is_strip() { + return Err( + pipeline::CreateRenderPipelineError::StripIndexFormatForNonStripTopology { + strip_index_format: desc.primitive.strip_index_format, + topology: desc.primitive.topology, + }, + ); + } + + if desc.primitive.unclipped_depth { + self.require_features(wgt::Features::DEPTH_CLIP_CONTROL)?; + } + + if desc.primitive.polygon_mode == wgt::PolygonMode::Line { + self.require_features(wgt::Features::POLYGON_MODE_LINE)?; + } + if desc.primitive.polygon_mode == wgt::PolygonMode::Point { + self.require_features(wgt::Features::POLYGON_MODE_POINT)?; + } + + if desc.primitive.conservative { + self.require_features(wgt::Features::CONSERVATIVE_RASTERIZATION)?; + } + + if desc.primitive.conservative && desc.primitive.polygon_mode != wgt::PolygonMode::Fill { + return Err( + pipeline::CreateRenderPipelineError::ConservativeRasterizationNonFillPolygonMode, + ); + } + + for (i, cs) in color_targets.iter().enumerate() { + if let Some(cs) = cs.as_ref() { + let error = loop { + if cs.write_mask.contains_invalid_bits() { + break Some(pipeline::ColorStateError::InvalidWriteMask(cs.write_mask)); + } + + let format_features = self.describe_format_features(adapter, cs.format)?; + if !format_features + .allowed_usages + .contains(wgt::TextureUsages::RENDER_ATTACHMENT) + { + break Some(pipeline::ColorStateError::FormatNotRenderable(cs.format)); + } + let blendable = format_features.flags.contains(Tfff::BLENDABLE); + let filterable = format_features.flags.contains(Tfff::FILTERABLE); + let adapter_specific = self + .features + .contains(wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES); + // according to WebGPU specifications the texture needs to be + // [`TextureFormatFeatureFlags::FILTERABLE`] if blending is set - use + // [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] to elude + // this limitation + if cs.blend.is_some() && (!blendable || (!filterable && !adapter_specific)) { + break Some(pipeline::ColorStateError::FormatNotBlendable(cs.format)); + } + if !hal::FormatAspects::from(cs.format).contains(hal::FormatAspects::COLOR) { + break Some(pipeline::ColorStateError::FormatNotColor(cs.format)); + } + if desc.multisample.count > 1 + && !format_features + .flags + .sample_count_supported(desc.multisample.count) + { + break Some(pipeline::ColorStateError::FormatNotMultisampled(cs.format)); + } + + break None; + }; + if let Some(e) = error { + return Err(pipeline::CreateRenderPipelineError::ColorState(i as u8, e)); + } + } + } + + if let Some(ds) = depth_stencil_state { + let error = loop { + let format_features = self.describe_format_features(adapter, ds.format)?; + if !format_features + .allowed_usages + .contains(wgt::TextureUsages::RENDER_ATTACHMENT) + { + break Some(pipeline::DepthStencilStateError::FormatNotRenderable( + ds.format, + )); + } + + let aspect = hal::FormatAspects::from(ds.format); + if ds.is_depth_enabled() && !aspect.contains(hal::FormatAspects::DEPTH) { + break Some(pipeline::DepthStencilStateError::FormatNotDepth(ds.format)); + } + if ds.stencil.is_enabled() && !aspect.contains(hal::FormatAspects::STENCIL) { + break Some(pipeline::DepthStencilStateError::FormatNotStencil( + ds.format, + )); + } + if desc.multisample.count > 1 + && !format_features + .flags + .sample_count_supported(desc.multisample.count) + { + break Some(pipeline::DepthStencilStateError::FormatNotMultisampled( + ds.format, + )); + } + + break None; + }; + if let Some(e) = error { + return Err(pipeline::CreateRenderPipelineError::DepthStencilState(e)); + } + + if ds.bias.clamp != 0.0 { + self.require_downlevel_flags(wgt::DownlevelFlags::DEPTH_BIAS_CLAMP)?; + } + } + + if desc.layout.is_none() { + for _ in 0..self.limits.max_bind_groups { + derived_group_layouts.push(binding_model::BindEntryMap::default()); + } + } + + let samples = { + let sc = desc.multisample.count; + if sc == 0 || sc > 32 || !conv::is_power_of_two_u32(sc) { + return Err(pipeline::CreateRenderPipelineError::InvalidSampleCount(sc)); + } + sc + }; + + let (shader_module_guard, _) = hub.shader_modules.read(&mut token); + + let vertex_stage = { + let stage = &desc.vertex.stage; + let flag = wgt::ShaderStages::VERTEX; + + let shader_module = shader_module_guard.get(stage.module).map_err(|_| { + pipeline::CreateRenderPipelineError::Stage { + stage: flag, + error: validation::StageError::InvalidModule, + } + })?; + + let provided_layouts = match desc.layout { + Some(pipeline_layout_id) => { + let pipeline_layout = pipeline_layout_guard + .get(pipeline_layout_id) + .map_err(|_| pipeline::CreateRenderPipelineError::InvalidLayout)?; + Some(Device::get_introspection_bind_group_layouts( + pipeline_layout, + &*bgl_guard, + )) + } + None => None, + }; + + if let Some(ref interface) = shader_module.interface { + io = interface + .check_stage( + provided_layouts.as_ref().map(|p| p.as_slice()), + &mut derived_group_layouts, + &mut shader_binding_sizes, + &stage.entry_point, + flag, + io, + desc.depth_stencil.as_ref().map(|d| d.depth_compare), + ) + .map_err(|error| pipeline::CreateRenderPipelineError::Stage { + stage: flag, + error, + })?; + validated_stages |= flag; + } + + hal::ProgrammableStage { + module: &shader_module.raw, + entry_point: stage.entry_point.as_ref(), + } + }; + + let fragment_stage = match desc.fragment { + Some(ref fragment) => { + let flag = wgt::ShaderStages::FRAGMENT; + + let shader_module = + shader_module_guard + .get(fragment.stage.module) + .map_err(|_| pipeline::CreateRenderPipelineError::Stage { + stage: flag, + error: validation::StageError::InvalidModule, + })?; + + let provided_layouts = match desc.layout { + Some(pipeline_layout_id) => Some(Device::get_introspection_bind_group_layouts( + pipeline_layout_guard + .get(pipeline_layout_id) + .map_err(|_| pipeline::CreateRenderPipelineError::InvalidLayout)?, + &*bgl_guard, + )), + None => None, + }; + + if validated_stages == wgt::ShaderStages::VERTEX { + if let Some(ref interface) = shader_module.interface { + io = interface + .check_stage( + provided_layouts.as_ref().map(|p| p.as_slice()), + &mut derived_group_layouts, + &mut shader_binding_sizes, + &fragment.stage.entry_point, + flag, + io, + desc.depth_stencil.as_ref().map(|d| d.depth_compare), + ) + .map_err(|error| pipeline::CreateRenderPipelineError::Stage { + stage: flag, + error, + })?; + validated_stages |= flag; + } + } + + Some(hal::ProgrammableStage { + module: &shader_module.raw, + entry_point: fragment.stage.entry_point.as_ref(), + }) + } + None => None, + }; + + if validated_stages.contains(wgt::ShaderStages::FRAGMENT) { + for (i, output) in io.iter() { + match color_targets.get(*i as usize) { + Some(&Some(ref state)) => { + validation::check_texture_format(state.format, &output.ty).map_err( + |pipeline| { + pipeline::CreateRenderPipelineError::ColorState( + *i as u8, + pipeline::ColorStateError::IncompatibleFormat { + pipeline, + shader: output.ty, + }, + ) + }, + )?; + } + _ => { + log::info!( + "The fragment stage {:?} output @location({}) values are ignored", + fragment_stage + .as_ref() + .map_or("", |stage| stage.entry_point), + i + ); + } + } + } + } + let last_stage = match desc.fragment { + Some(_) => wgt::ShaderStages::FRAGMENT, + None => wgt::ShaderStages::VERTEX, + }; + if desc.layout.is_none() && !validated_stages.contains(last_stage) { + return Err(pipeline::ImplicitLayoutError::ReflectionError(last_stage).into()); + } + + let pipeline_layout_id = match desc.layout { + Some(id) => id, + None => self.derive_pipeline_layout( + self_id, + implicit_context, + derived_group_layouts, + &mut *bgl_guard, + &mut *pipeline_layout_guard, + )?, + }; + let layout = pipeline_layout_guard + .get(pipeline_layout_id) + .map_err(|_| pipeline::CreateRenderPipelineError::InvalidLayout)?; + + // Multiview is only supported if the feature is enabled + if desc.multiview.is_some() { + self.require_features(wgt::Features::MULTIVIEW)?; + } + + for size in shader_binding_sizes.values() { + if size.get() % 16 != 0 { + self.require_downlevel_flags( + wgt::DownlevelFlags::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED, + )?; + } + } + + let late_sized_buffer_groups = + Device::make_late_sized_buffer_groups(&shader_binding_sizes, layout, &*bgl_guard); + + let pipeline_desc = hal::RenderPipelineDescriptor { + label: desc.label.borrow_option(), + layout: &layout.raw, + vertex_buffers: &vertex_buffers, + vertex_stage, + primitive: desc.primitive, + depth_stencil: desc.depth_stencil.clone(), + multisample: desc.multisample, + fragment_stage, + color_targets, + multiview: desc.multiview, + }; + let raw = + unsafe { self.raw.create_render_pipeline(&pipeline_desc) }.map_err( + |err| match err { + hal::PipelineError::Device(error) => { + pipeline::CreateRenderPipelineError::Device(error.into()) + } + hal::PipelineError::Linkage(stage, msg) => { + pipeline::CreateRenderPipelineError::Internal { stage, error: msg } + } + hal::PipelineError::EntryPoint(stage) => { + pipeline::CreateRenderPipelineError::Internal { + stage: hal::auxil::map_naga_stage(stage), + error: EP_FAILURE.to_string(), + } + } + }, + )?; + + let pass_context = RenderPassContext { + attachments: AttachmentData { + colors: color_targets + .iter() + .map(|state| state.as_ref().map(|s| s.format)) + .collect(), + resolves: ArrayVec::new(), + depth_stencil: depth_stencil_state.as_ref().map(|state| state.format), + }, + sample_count: samples, + multiview: desc.multiview, + }; + + let mut flags = pipeline::PipelineFlags::empty(); + for state in color_targets.iter().filter_map(|s| s.as_ref()) { + if let Some(ref bs) = state.blend { + if bs.color.uses_constant() | bs.alpha.uses_constant() { + flags |= pipeline::PipelineFlags::BLEND_CONSTANT; + } + } + } + if let Some(ds) = depth_stencil_state.as_ref() { + if ds.stencil.is_enabled() && ds.stencil.needs_ref_value() { + flags |= pipeline::PipelineFlags::STENCIL_REFERENCE; + } + if !ds.is_depth_read_only() { + flags |= pipeline::PipelineFlags::WRITES_DEPTH; + } + if !ds.is_stencil_read_only() { + flags |= pipeline::PipelineFlags::WRITES_STENCIL; + } + } + + let pipeline = pipeline::RenderPipeline { + raw, + layout_id: Stored { + value: id::Valid(pipeline_layout_id), + ref_count: layout.life_guard.add_ref(), + }, + device_id: Stored { + value: id::Valid(self_id), + ref_count: self.life_guard.add_ref(), + }, + pass_context, + flags, + strip_index_format: desc.primitive.strip_index_format, + vertex_steps, + late_sized_buffer_groups, + life_guard: LifeGuard::new(desc.label.borrow_or_default()), + }; + Ok(pipeline) + } + + fn describe_format_features( + &self, + adapter: &Adapter<A>, + format: TextureFormat, + ) -> Result<wgt::TextureFormatFeatures, MissingFeatures> { + let format_desc = format.describe(); + self.require_features(format_desc.required_features)?; + + let using_device_features = self + .features + .contains(wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES); + // If we're running downlevel, we need to manually ask the backend what + // we can use as we can't trust WebGPU. + let downlevel = !self.downlevel.is_webgpu_compliant(); + + if using_device_features || downlevel { + Ok(adapter.get_texture_format_features(format)) + } else { + Ok(format_desc.guaranteed_format_features) + } + } + + fn wait_for_submit( + &self, + submission_index: SubmissionIndex, + token: &mut Token<Self>, + ) -> Result<(), WaitIdleError> { + let last_done_index = unsafe { + self.raw + .get_fence_value(&self.fence) + .map_err(DeviceError::from)? + }; + if last_done_index < submission_index { + log::info!("Waiting for submission {:?}", submission_index); + unsafe { + self.raw + .wait(&self.fence, submission_index, !0) + .map_err(DeviceError::from)? + }; + let closures = self + .lock_life(token) + .triage_submissions(submission_index, &self.command_allocator); + assert!( + closures.is_empty(), + "wait_for_submit is not expected to work with closures" + ); + } + Ok(()) + } + + fn create_query_set( + &self, + self_id: id::DeviceId, + desc: &resource::QuerySetDescriptor, + ) -> Result<resource::QuerySet<A>, resource::CreateQuerySetError> { + use resource::CreateQuerySetError as Error; + + match desc.ty { + wgt::QueryType::Occlusion => {} + wgt::QueryType::Timestamp => { + self.require_features(wgt::Features::TIMESTAMP_QUERY)?; + } + wgt::QueryType::PipelineStatistics(..) => { + self.require_features(wgt::Features::PIPELINE_STATISTICS_QUERY)?; + } + } + + if desc.count == 0 { + return Err(Error::ZeroCount); + } + + if desc.count > wgt::QUERY_SET_MAX_QUERIES { + return Err(Error::TooManyQueries { + count: desc.count, + maximum: wgt::QUERY_SET_MAX_QUERIES, + }); + } + + let hal_desc = desc.map_label(super::LabelHelpers::borrow_option); + Ok(resource::QuerySet { + raw: unsafe { self.raw.create_query_set(&hal_desc).unwrap() }, + device_id: Stored { + value: id::Valid(self_id), + ref_count: self.life_guard.add_ref(), + }, + life_guard: LifeGuard::new(""), + desc: desc.map_label(|_| ()), + }) + } +} + +impl<A: HalApi> Device<A> { + pub(crate) fn destroy_buffer(&self, buffer: resource::Buffer<A>) { + if let Some(raw) = buffer.raw { + unsafe { + self.raw.destroy_buffer(raw); + } + } + } + + pub(crate) fn destroy_command_buffer(&self, cmd_buf: command::CommandBuffer<A>) { + let mut baked = cmd_buf.into_baked(); + unsafe { + baked.encoder.reset_all(baked.list.into_iter()); + } + unsafe { + self.raw.destroy_command_encoder(baked.encoder); + } + } + + /// Wait for idle and remove resources that we can, before we die. + pub(crate) fn prepare_to_die(&mut self) { + self.pending_writes.deactivate(); + let mut life_tracker = self.life_tracker.lock(); + let current_index = self.active_submission_index; + if let Err(error) = unsafe { self.raw.wait(&self.fence, current_index, CLEANUP_WAIT_MS) } { + log::error!("failed to wait for the device: {:?}", error); + } + let _ = life_tracker.triage_submissions(current_index, &self.command_allocator); + life_tracker.cleanup(&self.raw); + #[cfg(feature = "trace")] + { + self.trace = None; + } + } + + pub(crate) fn dispose(self) { + self.pending_writes.dispose(&self.raw); + self.command_allocator.into_inner().dispose(&self.raw); + unsafe { + self.raw.destroy_buffer(self.zero_buffer); + self.raw.destroy_fence(self.fence); + self.raw.exit(self.queue); + } + } +} + +impl<A: HalApi> crate::hub::Resource for Device<A> { + const TYPE: &'static str = "Device"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +#[derive(Clone, Debug, Error)] +#[error("device is invalid")] +pub struct InvalidDevice; + +#[derive(Clone, Debug, Error)] +pub enum DeviceError { + #[error("parent device is invalid")] + Invalid, + #[error("parent device is lost")] + Lost, + #[error("not enough memory left")] + OutOfMemory, +} + +impl From<hal::DeviceError> for DeviceError { + fn from(error: hal::DeviceError) -> Self { + match error { + hal::DeviceError::Lost => DeviceError::Lost, + hal::DeviceError::OutOfMemory => DeviceError::OutOfMemory, + } + } +} + +#[derive(Clone, Debug, Error)] +#[error("Features {0:?} are required but not enabled on the device")] +pub struct MissingFeatures(pub wgt::Features); + +#[derive(Clone, Debug, Error)] +#[error( + "Downlevel flags {0:?} are required but not supported on the device.\n{}", + DOWNLEVEL_ERROR_MESSAGE +)] +pub struct MissingDownlevelFlags(pub wgt::DownlevelFlags); + +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct ImplicitPipelineContext { + pub root_id: id::PipelineLayoutId, + pub group_ids: ArrayVec<id::BindGroupLayoutId, { hal::MAX_BIND_GROUPS }>, +} + +pub struct ImplicitPipelineIds<'a, G: GlobalIdentityHandlerFactory> { + pub root_id: Input<G, id::PipelineLayoutId>, + pub group_ids: &'a [Input<G, id::BindGroupLayoutId>], +} + +impl<G: GlobalIdentityHandlerFactory> ImplicitPipelineIds<'_, G> { + fn prepare<A: HalApi>(self, hub: &Hub<A, G>) -> ImplicitPipelineContext { + ImplicitPipelineContext { + root_id: hub.pipeline_layouts.prepare(self.root_id).into_id(), + group_ids: self + .group_ids + .iter() + .map(|id_in| hub.bind_group_layouts.prepare(id_in.clone()).into_id()) + .collect(), + } + } +} + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + pub fn adapter_is_surface_supported<A: HalApi>( + &self, + adapter_id: id::AdapterId, + surface_id: id::SurfaceId, + ) -> Result<bool, instance::IsSurfaceSupportedError> { + let hub = A::hub(self); + let mut token = Token::root(); + + let (surface_guard, mut token) = self.surfaces.read(&mut token); + let (adapter_guard, mut _token) = hub.adapters.read(&mut token); + let adapter = adapter_guard + .get(adapter_id) + .map_err(|_| instance::IsSurfaceSupportedError::InvalidAdapter)?; + let surface = surface_guard + .get(surface_id) + .map_err(|_| instance::IsSurfaceSupportedError::InvalidSurface)?; + Ok(adapter.is_surface_supported(surface)) + } + + pub fn surface_get_capabilities<A: HalApi>( + &self, + surface_id: id::SurfaceId, + adapter_id: id::AdapterId, + ) -> Result<wgt::SurfaceCapabilities, instance::GetSurfaceSupportError> { + profiling::scope!("Surface::get_capabilities"); + self.fetch_adapter_and_surface::<A, _, _>(surface_id, adapter_id, |adapter, surface| { + let hal_caps = surface.get_capabilities(adapter)?; + + Ok(wgt::SurfaceCapabilities { + formats: hal_caps.formats, + present_modes: hal_caps.present_modes, + alpha_modes: hal_caps.composite_alpha_modes, + }) + }) + } + + fn fetch_adapter_and_surface< + A: HalApi, + F: FnOnce(&Adapter<A>, &Surface) -> Result<B, instance::GetSurfaceSupportError>, + B, + >( + &self, + surface_id: id::SurfaceId, + adapter_id: id::AdapterId, + get_supported_callback: F, + ) -> Result<B, instance::GetSurfaceSupportError> { + let hub = A::hub(self); + let mut token = Token::root(); + + let (surface_guard, mut token) = self.surfaces.read(&mut token); + let (adapter_guard, mut _token) = hub.adapters.read(&mut token); + let adapter = adapter_guard + .get(adapter_id) + .map_err(|_| instance::GetSurfaceSupportError::InvalidAdapter)?; + let surface = surface_guard + .get(surface_id) + .map_err(|_| instance::GetSurfaceSupportError::InvalidSurface)?; + + get_supported_callback(adapter, surface) + } + + pub fn device_features<A: HalApi>( + &self, + device_id: id::DeviceId, + ) -> Result<wgt::Features, InvalidDevice> { + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, _) = hub.devices.read(&mut token); + let device = device_guard.get(device_id).map_err(|_| InvalidDevice)?; + + Ok(device.features) + } + + pub fn device_limits<A: HalApi>( + &self, + device_id: id::DeviceId, + ) -> Result<wgt::Limits, InvalidDevice> { + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, _) = hub.devices.read(&mut token); + let device = device_guard.get(device_id).map_err(|_| InvalidDevice)?; + + Ok(device.limits.clone()) + } + + pub fn device_downlevel_properties<A: HalApi>( + &self, + device_id: id::DeviceId, + ) -> Result<wgt::DownlevelCapabilities, InvalidDevice> { + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, _) = hub.devices.read(&mut token); + let device = device_guard.get(device_id).map_err(|_| InvalidDevice)?; + + Ok(device.downlevel.clone()) + } + + pub fn device_create_buffer<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &resource::BufferDescriptor, + id_in: Input<G, id::BufferId>, + ) -> (id::BufferId, Option<resource::CreateBufferError>) { + profiling::scope!("Device::create_buffer"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.buffers.prepare(id_in); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + let mut desc = desc.clone(); + let mapped_at_creation = mem::replace(&mut desc.mapped_at_creation, false); + if mapped_at_creation && !desc.usage.contains(wgt::BufferUsages::MAP_WRITE) { + desc.usage |= wgt::BufferUsages::COPY_DST; + } + trace + .lock() + .add(trace::Action::CreateBuffer(fid.id(), desc)); + } + + let mut buffer = match device.create_buffer(device_id, desc, false) { + Ok(buffer) => buffer, + Err(e) => break e, + }; + let ref_count = buffer.life_guard.add_ref(); + + let buffer_use = if !desc.mapped_at_creation { + hal::BufferUses::empty() + } else if desc.usage.contains(wgt::BufferUsages::MAP_WRITE) { + // buffer is mappable, so we are just doing that at start + let map_size = buffer.size; + let ptr = if map_size == 0 { + std::ptr::NonNull::dangling() + } else { + match map_buffer(&device.raw, &mut buffer, 0, map_size, HostMap::Write) { + Ok(ptr) => ptr, + Err(e) => { + let raw = buffer.raw.unwrap(); + device.lock_life(&mut token).schedule_resource_destruction( + queue::TempResource::Buffer(raw), + !0, + ); + break e.into(); + } + } + }; + buffer.map_state = resource::BufferMapState::Active { + ptr, + range: 0..map_size, + host: HostMap::Write, + }; + hal::BufferUses::MAP_WRITE + } else { + // buffer needs staging area for initialization only + let stage_desc = wgt::BufferDescriptor { + label: Some(Cow::Borrowed( + "(wgpu internal) initializing unmappable buffer", + )), + size: desc.size, + usage: wgt::BufferUsages::MAP_WRITE | wgt::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }; + let mut stage = match device.create_buffer(device_id, &stage_desc, true) { + Ok(stage) => stage, + Err(e) => { + let raw = buffer.raw.unwrap(); + device + .lock_life(&mut token) + .schedule_resource_destruction(queue::TempResource::Buffer(raw), !0); + break e; + } + }; + let stage_buffer = stage.raw.unwrap(); + let mapping = match unsafe { device.raw.map_buffer(&stage_buffer, 0..stage.size) } { + Ok(mapping) => mapping, + Err(e) => { + let raw = buffer.raw.unwrap(); + let mut life_lock = device.lock_life(&mut token); + life_lock + .schedule_resource_destruction(queue::TempResource::Buffer(raw), !0); + life_lock.schedule_resource_destruction( + queue::TempResource::Buffer(stage_buffer), + !0, + ); + break DeviceError::from(e).into(); + } + }; + + assert_eq!(buffer.size % wgt::COPY_BUFFER_ALIGNMENT, 0); + // Zero initialize memory and then mark both staging and buffer as initialized + // (it's guaranteed that this is the case by the time the buffer is usable) + unsafe { ptr::write_bytes(mapping.ptr.as_ptr(), 0, buffer.size as usize) }; + buffer.initialization_status.drain(0..buffer.size); + stage.initialization_status.drain(0..buffer.size); + + buffer.map_state = resource::BufferMapState::Init { + ptr: mapping.ptr, + needs_flush: !mapping.is_coherent, + stage_buffer, + }; + hal::BufferUses::COPY_DST + }; + + let id = fid.assign(buffer, &mut token); + log::info!("Created buffer {:?} with {:?}", id, desc); + + device + .trackers + .lock() + .buffers + .insert_single(id, ref_count, buffer_use); + + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + /// Assign `id_in` an error with the given `label`. + /// + /// Ensure that future attempts to use `id_in` as a buffer ID will propagate + /// the error, following the WebGPU ["contagious invalidity"] style. + /// + /// Firefox uses this function to comply strictly with the WebGPU spec, + /// which requires [`GPUBufferDescriptor`] validation to be generated on the + /// Device timeline and leave the newly created [`GPUBuffer`] invalid. + /// + /// Ideally, we would simply let [`device_create_buffer`] take care of all + /// of this, but some errors must be detected before we can even construct a + /// [`wgpu_types::BufferDescriptor`] to give it. For example, the WebGPU API + /// allows a `GPUBufferDescriptor`'s [`usage`] property to be any WebIDL + /// `unsigned long` value, but we can't construct a + /// [`wgpu_types::BufferUsages`] value from values with unassigned bits + /// set. This means we must validate `usage` before we can call + /// `device_create_buffer`. + /// + /// When that validation fails, we must arrange for the buffer id to be + /// considered invalid. This method provides the means to do so. + /// + /// ["contagious invalidity"]: https://www.w3.org/TR/webgpu/#invalidity + /// [`GPUBufferDescriptor`]: https://www.w3.org/TR/webgpu/#dictdef-gpubufferdescriptor + /// [`GPUBuffer`]: https://www.w3.org/TR/webgpu/#gpubuffer + /// [`wgpu_types::BufferDescriptor`]: wgt::BufferDescriptor + /// [`device_create_buffer`]: Global::device_create_buffer + /// [`usage`]: https://www.w3.org/TR/webgpu/#dom-gputexturedescriptor-usage + /// [`wgpu_types::BufferUsages`]: wgt::BufferUsages + pub fn create_buffer_error<A: HalApi>(&self, id_in: Input<G, id::BufferId>, label: Label) { + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.buffers.prepare(id_in); + + let (_, mut token) = hub.devices.read(&mut token); + fid.assign_error(label.borrow_or_default(), &mut token); + } + + /// Assign `id_in` an error with the given `label`. + /// + /// See `create_buffer_error` for more context and explaination. + pub fn create_texture_error<A: HalApi>(&self, id_in: Input<G, id::TextureId>, label: Label) { + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.textures.prepare(id_in); + + let (_, mut token) = hub.devices.read(&mut token); + fid.assign_error(label.borrow_or_default(), &mut token); + } + + #[cfg(feature = "replay")] + pub fn device_wait_for_buffer<A: HalApi>( + &self, + device_id: id::DeviceId, + buffer_id: id::BufferId, + ) -> Result<(), WaitIdleError> { + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); + let last_submission = { + let (buffer_guard, _) = hub.buffers.write(&mut token); + match buffer_guard.get(buffer_id) { + Ok(buffer) => buffer.life_guard.life_count(), + Err(_) => return Ok(()), + } + }; + + device_guard + .get(device_id) + .map_err(|_| DeviceError::Invalid)? + .wait_for_submit(last_submission, &mut token) + } + + #[doc(hidden)] + pub fn device_set_buffer_sub_data<A: HalApi>( + &self, + device_id: id::DeviceId, + buffer_id: id::BufferId, + offset: BufferAddress, + data: &[u8], + ) -> BufferAccessResult { + profiling::scope!("Device::set_buffer_sub_data"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let (mut buffer_guard, _) = hub.buffers.write(&mut token); + let device = device_guard + .get(device_id) + .map_err(|_| DeviceError::Invalid)?; + let buffer = buffer_guard + .get_mut(buffer_id) + .map_err(|_| BufferAccessError::Invalid)?; + check_buffer_usage(buffer.usage, wgt::BufferUsages::MAP_WRITE)?; + //assert!(buffer isn't used by the GPU); + + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + let mut trace = trace.lock(); + let data_path = trace.make_binary("bin", data); + trace.add(trace::Action::WriteBuffer { + id: buffer_id, + data: data_path, + range: offset..offset + data.len() as BufferAddress, + queued: false, + }); + } + + let raw_buf = buffer.raw.as_ref().unwrap(); + unsafe { + let mapping = device + .raw + .map_buffer(raw_buf, offset..offset + data.len() as u64) + .map_err(DeviceError::from)?; + ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len()); + if !mapping.is_coherent { + device + .raw + .flush_mapped_ranges(raw_buf, iter::once(offset..offset + data.len() as u64)); + } + device + .raw + .unmap_buffer(raw_buf) + .map_err(DeviceError::from)?; + } + + Ok(()) + } + + #[doc(hidden)] + pub fn device_get_buffer_sub_data<A: HalApi>( + &self, + device_id: id::DeviceId, + buffer_id: id::BufferId, + offset: BufferAddress, + data: &mut [u8], + ) -> BufferAccessResult { + profiling::scope!("Device::get_buffer_sub_data"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let (mut buffer_guard, _) = hub.buffers.write(&mut token); + let device = device_guard + .get(device_id) + .map_err(|_| DeviceError::Invalid)?; + let buffer = buffer_guard + .get_mut(buffer_id) + .map_err(|_| BufferAccessError::Invalid)?; + check_buffer_usage(buffer.usage, wgt::BufferUsages::MAP_READ)?; + //assert!(buffer isn't used by the GPU); + + let raw_buf = buffer.raw.as_ref().unwrap(); + unsafe { + let mapping = device + .raw + .map_buffer(raw_buf, offset..offset + data.len() as u64) + .map_err(DeviceError::from)?; + if !mapping.is_coherent { + device.raw.invalidate_mapped_ranges( + raw_buf, + iter::once(offset..offset + data.len() as u64), + ); + } + ptr::copy_nonoverlapping(mapping.ptr.as_ptr(), data.as_mut_ptr(), data.len()); + device + .raw + .unmap_buffer(raw_buf) + .map_err(DeviceError::from)?; + } + + Ok(()) + } + + pub fn buffer_label<A: HalApi>(&self, id: id::BufferId) -> String { + A::hub(self).buffers.label_for_resource(id) + } + + pub fn buffer_destroy<A: HalApi>( + &self, + buffer_id: id::BufferId, + ) -> Result<(), resource::DestroyError> { + profiling::scope!("Buffer::destroy"); + + let map_closure; + // Restrict the locks to this scope. + { + let hub = A::hub(self); + let mut token = Token::root(); + + //TODO: lock pending writes separately, keep the device read-only + let (mut device_guard, mut token) = hub.devices.write(&mut token); + + log::info!("Buffer {:?} is destroyed", buffer_id); + let (mut buffer_guard, _) = hub.buffers.write(&mut token); + let buffer = buffer_guard + .get_mut(buffer_id) + .map_err(|_| resource::DestroyError::Invalid)?; + + let device = &mut device_guard[buffer.device_id.value]; + + map_closure = match &buffer.map_state { + &BufferMapState::Waiting(..) // To get the proper callback behavior. + | &BufferMapState::Init { .. } + | &BufferMapState::Active { .. } + => { + self.buffer_unmap_inner(buffer_id, buffer, device) + .unwrap_or(None) + } + _ => None, + }; + + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace.lock().add(trace::Action::FreeBuffer(buffer_id)); + } + + let raw = buffer + .raw + .take() + .ok_or(resource::DestroyError::AlreadyDestroyed)?; + let temp = queue::TempResource::Buffer(raw); + + if device.pending_writes.dst_buffers.contains(&buffer_id) { + device.pending_writes.temp_resources.push(temp); + } else { + let last_submit_index = buffer.life_guard.life_count(); + drop(buffer_guard); + device + .lock_life(&mut token) + .schedule_resource_destruction(temp, last_submit_index); + } + } + + // Note: outside the scope where locks are held when calling the callback + if let Some((operation, status)) = map_closure { + operation.callback.call(status); + } + + Ok(()) + } + + pub fn buffer_drop<A: HalApi>(&self, buffer_id: id::BufferId, wait: bool) { + profiling::scope!("Buffer::drop"); + log::debug!("buffer {:?} is dropped", buffer_id); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (ref_count, last_submit_index, device_id) = { + let (mut buffer_guard, _) = hub.buffers.write(&mut token); + match buffer_guard.get_mut(buffer_id) { + Ok(buffer) => { + let ref_count = buffer.life_guard.ref_count.take().unwrap(); + let last_submit_index = buffer.life_guard.life_count(); + (ref_count, last_submit_index, buffer.device_id.value) + } + Err(InvalidId) => { + hub.buffers.unregister_locked(buffer_id, &mut *buffer_guard); + return; + } + } + }; + + let (device_guard, mut token) = hub.devices.read(&mut token); + let device = &device_guard[device_id]; + { + let mut life_lock = device.lock_life(&mut token); + if device.pending_writes.dst_buffers.contains(&buffer_id) { + life_lock.future_suspected_buffers.push(Stored { + value: id::Valid(buffer_id), + ref_count, + }); + } else { + drop(ref_count); + life_lock + .suspected_resources + .buffers + .push(id::Valid(buffer_id)); + } + } + + if wait { + match device.wait_for_submit(last_submit_index, &mut token) { + Ok(()) => (), + Err(e) => log::error!("Failed to wait for buffer {:?}: {:?}", buffer_id, e), + } + } + } + + pub fn device_create_texture<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &resource::TextureDescriptor, + id_in: Input<G, id::TextureId>, + ) -> (id::TextureId, Option<resource::CreateTextureError>) { + profiling::scope!("Device::create_texture"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.textures.prepare(id_in); + + let (adapter_guard, mut token) = hub.adapters.read(&mut token); + let (device_guard, mut token) = hub.devices.read(&mut token); + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace + .lock() + .add(trace::Action::CreateTexture(fid.id(), desc.clone())); + } + + let adapter = &adapter_guard[device.adapter_id.value]; + let texture = match device.create_texture(device_id, adapter, desc) { + Ok(texture) => texture, + Err(error) => break error, + }; + let ref_count = texture.life_guard.add_ref(); + + let id = fid.assign(texture, &mut token); + log::info!("Created texture {:?} with {:?}", id, desc); + + device.trackers.lock().textures.insert_single( + id.0, + ref_count, + hal::TextureUses::UNINITIALIZED, + ); + + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + /// # Safety + /// + /// - `hal_texture` must be created from `device_id` corresponding raw handle. + /// - `hal_texture` must be created respecting `desc` + /// - `hal_texture` must be initialized + pub unsafe fn create_texture_from_hal<A: HalApi>( + &self, + hal_texture: A::Texture, + device_id: id::DeviceId, + desc: &resource::TextureDescriptor, + id_in: Input<G, id::TextureId>, + ) -> (id::TextureId, Option<resource::CreateTextureError>) { + profiling::scope!("Device::create_texture"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.textures.prepare(id_in); + + let (adapter_guard, mut token) = hub.adapters.read(&mut token); + let (device_guard, mut token) = hub.devices.read(&mut token); + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + + // NB: Any change done through the raw texture handle will not be + // recorded in the replay + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace + .lock() + .add(trace::Action::CreateTexture(fid.id(), desc.clone())); + } + + let adapter = &adapter_guard[device.adapter_id.value]; + + let format_features = match device + .describe_format_features(adapter, desc.format) + .map_err(|error| resource::CreateTextureError::MissingFeatures(desc.format, error)) + { + Ok(features) => features, + Err(error) => break error, + }; + + let mut texture = device.create_texture_from_hal( + hal_texture, + conv::map_texture_usage(desc.usage, desc.format.into()), + device_id, + desc, + format_features, + resource::TextureClearMode::None, + ); + if desc.usage.contains(wgt::TextureUsages::COPY_DST) { + texture.hal_usage |= hal::TextureUses::COPY_DST; + } + + texture.initialization_status = TextureInitTracker::new(desc.mip_level_count, 0); + + let ref_count = texture.life_guard.add_ref(); + + let id = fid.assign(texture, &mut token); + log::info!("Created texture {:?} with {:?}", id, desc); + + device.trackers.lock().textures.insert_single( + id.0, + ref_count, + hal::TextureUses::UNINITIALIZED, + ); + + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + pub fn texture_label<A: HalApi>(&self, id: id::TextureId) -> String { + A::hub(self).textures.label_for_resource(id) + } + + pub fn texture_destroy<A: HalApi>( + &self, + texture_id: id::TextureId, + ) -> Result<(), resource::DestroyError> { + profiling::scope!("Texture::destroy"); + + let hub = A::hub(self); + let mut token = Token::root(); + + //TODO: lock pending writes separately, keep the device read-only + let (mut device_guard, mut token) = hub.devices.write(&mut token); + + log::info!("Buffer {:?} is destroyed", texture_id); + let (mut texture_guard, _) = hub.textures.write(&mut token); + let texture = texture_guard + .get_mut(texture_id) + .map_err(|_| resource::DestroyError::Invalid)?; + + let device = &mut device_guard[texture.device_id.value]; + + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace.lock().add(trace::Action::FreeTexture(texture_id)); + } + + let last_submit_index = texture.life_guard.life_count(); + + let clear_views = + match std::mem::replace(&mut texture.clear_mode, resource::TextureClearMode::None) { + resource::TextureClearMode::BufferCopy => SmallVec::new(), + resource::TextureClearMode::RenderPass { clear_views, .. } => clear_views, + resource::TextureClearMode::None => SmallVec::new(), + }; + + match texture.inner { + resource::TextureInner::Native { ref mut raw } => { + let raw = raw.take().ok_or(resource::DestroyError::AlreadyDestroyed)?; + let temp = queue::TempResource::Texture(raw, clear_views); + + if device.pending_writes.dst_textures.contains(&texture_id) { + device.pending_writes.temp_resources.push(temp); + } else { + drop(texture_guard); + device + .lock_life(&mut token) + .schedule_resource_destruction(temp, last_submit_index); + } + } + resource::TextureInner::Surface { .. } => { + for clear_view in clear_views { + unsafe { + device.raw.destroy_texture_view(clear_view); + } + } + // TODO? + } + } + + Ok(()) + } + + pub fn texture_drop<A: HalApi>(&self, texture_id: id::TextureId, wait: bool) { + profiling::scope!("Texture::drop"); + log::debug!("texture {:?} is dropped", texture_id); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (ref_count, last_submit_index, device_id) = { + let (mut texture_guard, _) = hub.textures.write(&mut token); + match texture_guard.get_mut(texture_id) { + Ok(texture) => { + let ref_count = texture.life_guard.ref_count.take().unwrap(); + let last_submit_index = texture.life_guard.life_count(); + (ref_count, last_submit_index, texture.device_id.value) + } + Err(InvalidId) => { + hub.textures + .unregister_locked(texture_id, &mut *texture_guard); + return; + } + } + }; + + let (device_guard, mut token) = hub.devices.read(&mut token); + let device = &device_guard[device_id]; + { + let mut life_lock = device.lock_life(&mut token); + if device.pending_writes.dst_textures.contains(&texture_id) { + life_lock.future_suspected_textures.push(Stored { + value: id::Valid(texture_id), + ref_count, + }); + } else { + drop(ref_count); + life_lock + .suspected_resources + .textures + .push(id::Valid(texture_id)); + } + } + + if wait { + match device.wait_for_submit(last_submit_index, &mut token) { + Ok(()) => (), + Err(e) => log::error!("Failed to wait for texture {:?}: {:?}", texture_id, e), + } + } + } + + pub fn texture_create_view<A: HalApi>( + &self, + texture_id: id::TextureId, + desc: &resource::TextureViewDescriptor, + id_in: Input<G, id::TextureViewId>, + ) -> (id::TextureViewId, Option<resource::CreateTextureViewError>) { + profiling::scope!("Texture::create_view"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.texture_views.prepare(id_in); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let (texture_guard, mut token) = hub.textures.read(&mut token); + let error = loop { + let texture = match texture_guard.get(texture_id) { + Ok(texture) => texture, + Err(_) => break resource::CreateTextureViewError::InvalidTexture, + }; + let device = &device_guard[texture.device_id.value]; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace.lock().add(trace::Action::CreateTextureView { + id: fid.id(), + parent_id: texture_id, + desc: desc.clone(), + }); + } + + let view = match device.create_texture_view(texture, texture_id, desc) { + Ok(view) => view, + Err(e) => break e, + }; + let ref_count = view.life_guard.add_ref(); + let id = fid.assign(view, &mut token); + + device.trackers.lock().views.insert_single(id, ref_count); + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + pub fn texture_view_label<A: HalApi>(&self, id: id::TextureViewId) -> String { + A::hub(self).texture_views.label_for_resource(id) + } + + pub fn texture_view_drop<A: HalApi>( + &self, + texture_view_id: id::TextureViewId, + wait: bool, + ) -> Result<(), resource::TextureViewDestroyError> { + profiling::scope!("TextureView::drop"); + log::debug!("texture view {:?} is dropped", texture_view_id); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (last_submit_index, device_id) = { + let (mut texture_view_guard, _) = hub.texture_views.write(&mut token); + + match texture_view_guard.get_mut(texture_view_id) { + Ok(view) => { + let _ref_count = view.life_guard.ref_count.take(); + let last_submit_index = view.life_guard.life_count(); + (last_submit_index, view.device_id.value) + } + Err(InvalidId) => { + hub.texture_views + .unregister_locked(texture_view_id, &mut *texture_view_guard); + return Ok(()); + } + } + }; + + let (device_guard, mut token) = hub.devices.read(&mut token); + let device = &device_guard[device_id]; + device + .lock_life(&mut token) + .suspected_resources + .texture_views + .push(id::Valid(texture_view_id)); + + if wait { + match device.wait_for_submit(last_submit_index, &mut token) { + Ok(()) => (), + Err(e) => log::error!( + "Failed to wait for texture view {:?}: {:?}", + texture_view_id, + e + ), + } + } + Ok(()) + } + + pub fn device_create_sampler<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &resource::SamplerDescriptor, + id_in: Input<G, id::SamplerId>, + ) -> (id::SamplerId, Option<resource::CreateSamplerError>) { + profiling::scope!("Device::create_sampler"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.samplers.prepare(id_in); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace + .lock() + .add(trace::Action::CreateSampler(fid.id(), desc.clone())); + } + + let sampler = match device.create_sampler(device_id, desc) { + Ok(sampler) => sampler, + Err(e) => break e, + }; + let ref_count = sampler.life_guard.add_ref(); + let id = fid.assign(sampler, &mut token); + + device.trackers.lock().samplers.insert_single(id, ref_count); + + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + pub fn sampler_label<A: HalApi>(&self, id: id::SamplerId) -> String { + A::hub(self).samplers.label_for_resource(id) + } + + pub fn sampler_drop<A: HalApi>(&self, sampler_id: id::SamplerId) { + profiling::scope!("Sampler::drop"); + log::debug!("sampler {:?} is dropped", sampler_id); + + let hub = A::hub(self); + let mut token = Token::root(); + + let device_id = { + let (mut sampler_guard, _) = hub.samplers.write(&mut token); + match sampler_guard.get_mut(sampler_id) { + Ok(sampler) => { + sampler.life_guard.ref_count.take(); + sampler.device_id.value + } + Err(InvalidId) => { + hub.samplers + .unregister_locked(sampler_id, &mut *sampler_guard); + return; + } + } + }; + + let (device_guard, mut token) = hub.devices.read(&mut token); + device_guard[device_id] + .lock_life(&mut token) + .suspected_resources + .samplers + .push(id::Valid(sampler_id)); + } + + pub fn device_create_bind_group_layout<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &binding_model::BindGroupLayoutDescriptor, + id_in: Input<G, id::BindGroupLayoutId>, + ) -> ( + id::BindGroupLayoutId, + Option<binding_model::CreateBindGroupLayoutError>, + ) { + profiling::scope!("Device::create_bind_group_layout"); + + let mut token = Token::root(); + let hub = A::hub(self); + let fid = hub.bind_group_layouts.prepare(id_in); + + let error = 'outer: loop { + let (device_guard, mut token) = hub.devices.read(&mut token); + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace + .lock() + .add(trace::Action::CreateBindGroupLayout(fid.id(), desc.clone())); + } + + let mut entry_map = FastHashMap::default(); + for entry in desc.entries.iter() { + if entry.binding > device.limits.max_bindings_per_bind_group { + break 'outer binding_model::CreateBindGroupLayoutError::InvalidBindingIndex { + binding: entry.binding, + maximum: device.limits.max_bindings_per_bind_group, + }; + } + if entry_map.insert(entry.binding, *entry).is_some() { + break 'outer binding_model::CreateBindGroupLayoutError::ConflictBinding( + entry.binding, + ); + } + } + + // If there is an equivalent BGL, just bump the refcount and return it. + // This is only applicable for identity filters that are generating new IDs, + // so their inputs are `PhantomData` of size 0. + if mem::size_of::<Input<G, id::BindGroupLayoutId>>() == 0 { + let (bgl_guard, _) = hub.bind_group_layouts.read(&mut token); + if let Some(id) = + Device::deduplicate_bind_group_layout(device_id, &entry_map, &*bgl_guard) + { + return (id, None); + } + } + + let layout = match device.create_bind_group_layout( + device_id, + desc.label.borrow_option(), + entry_map, + ) { + Ok(layout) => layout, + Err(e) => break e, + }; + + let id = fid.assign(layout, &mut token); + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + pub fn bind_group_layout_label<A: HalApi>(&self, id: id::BindGroupLayoutId) -> String { + A::hub(self).bind_group_layouts.label_for_resource(id) + } + + pub fn bind_group_layout_drop<A: HalApi>(&self, bind_group_layout_id: id::BindGroupLayoutId) { + profiling::scope!("BindGroupLayout::drop"); + log::debug!("bind group layout {:?} is dropped", bind_group_layout_id); + + let hub = A::hub(self); + let mut token = Token::root(); + let device_id = { + let (mut bind_group_layout_guard, _) = hub.bind_group_layouts.write(&mut token); + match bind_group_layout_guard.get_mut(bind_group_layout_id) { + Ok(layout) => layout.device_id.value, + Err(InvalidId) => { + hub.bind_group_layouts + .unregister_locked(bind_group_layout_id, &mut *bind_group_layout_guard); + return; + } + } + }; + + let (device_guard, mut token) = hub.devices.read(&mut token); + device_guard[device_id] + .lock_life(&mut token) + .suspected_resources + .bind_group_layouts + .push(id::Valid(bind_group_layout_id)); + } + + pub fn device_create_pipeline_layout<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &binding_model::PipelineLayoutDescriptor, + id_in: Input<G, id::PipelineLayoutId>, + ) -> ( + id::PipelineLayoutId, + Option<binding_model::CreatePipelineLayoutError>, + ) { + profiling::scope!("Device::create_pipeline_layout"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.pipeline_layouts.prepare(id_in); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace + .lock() + .add(trace::Action::CreatePipelineLayout(fid.id(), desc.clone())); + } + + let layout = { + let (bgl_guard, _) = hub.bind_group_layouts.read(&mut token); + match device.create_pipeline_layout(device_id, desc, &*bgl_guard) { + Ok(layout) => layout, + Err(e) => break e, + } + }; + + let id = fid.assign(layout, &mut token); + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + pub fn pipeline_layout_label<A: HalApi>(&self, id: id::PipelineLayoutId) -> String { + A::hub(self).pipeline_layouts.label_for_resource(id) + } + + pub fn pipeline_layout_drop<A: HalApi>(&self, pipeline_layout_id: id::PipelineLayoutId) { + profiling::scope!("PipelineLayout::drop"); + log::debug!("pipeline layout {:?} is dropped", pipeline_layout_id); + + let hub = A::hub(self); + let mut token = Token::root(); + let (device_id, ref_count) = { + let (mut pipeline_layout_guard, _) = hub.pipeline_layouts.write(&mut token); + match pipeline_layout_guard.get_mut(pipeline_layout_id) { + Ok(layout) => ( + layout.device_id.value, + layout.life_guard.ref_count.take().unwrap(), + ), + Err(InvalidId) => { + hub.pipeline_layouts + .unregister_locked(pipeline_layout_id, &mut *pipeline_layout_guard); + return; + } + } + }; + + let (device_guard, mut token) = hub.devices.read(&mut token); + device_guard[device_id] + .lock_life(&mut token) + .suspected_resources + .pipeline_layouts + .push(Stored { + value: id::Valid(pipeline_layout_id), + ref_count, + }); + } + + pub fn device_create_bind_group<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &binding_model::BindGroupDescriptor, + id_in: Input<G, id::BindGroupId>, + ) -> (id::BindGroupId, Option<binding_model::CreateBindGroupError>) { + profiling::scope!("Device::create_bind_group"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.bind_groups.prepare(id_in); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let (bind_group_layout_guard, mut token) = hub.bind_group_layouts.read(&mut token); + + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace + .lock() + .add(trace::Action::CreateBindGroup(fid.id(), desc.clone())); + } + + let bind_group_layout = match bind_group_layout_guard.get(desc.layout) { + Ok(layout) => layout, + Err(_) => break binding_model::CreateBindGroupError::InvalidLayout, + }; + let bind_group = + match device.create_bind_group(device_id, bind_group_layout, desc, hub, &mut token) + { + Ok(bind_group) => bind_group, + Err(e) => break e, + }; + let ref_count = bind_group.life_guard.add_ref(); + + let id = fid.assign(bind_group, &mut token); + log::debug!("Bind group {:?}", id,); + + device + .trackers + .lock() + .bind_groups + .insert_single(id, ref_count); + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + pub fn bind_group_label<A: HalApi>(&self, id: id::BindGroupId) -> String { + A::hub(self).bind_groups.label_for_resource(id) + } + + pub fn bind_group_drop<A: HalApi>(&self, bind_group_id: id::BindGroupId) { + profiling::scope!("BindGroup::drop"); + log::debug!("bind group {:?} is dropped", bind_group_id); + + let hub = A::hub(self); + let mut token = Token::root(); + + let device_id = { + let (mut bind_group_guard, _) = hub.bind_groups.write(&mut token); + match bind_group_guard.get_mut(bind_group_id) { + Ok(bind_group) => { + bind_group.life_guard.ref_count.take(); + bind_group.device_id.value + } + Err(InvalidId) => { + hub.bind_groups + .unregister_locked(bind_group_id, &mut *bind_group_guard); + return; + } + } + }; + + let (device_guard, mut token) = hub.devices.read(&mut token); + device_guard[device_id] + .lock_life(&mut token) + .suspected_resources + .bind_groups + .push(id::Valid(bind_group_id)); + } + + pub fn device_create_shader_module<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &pipeline::ShaderModuleDescriptor, + source: pipeline::ShaderModuleSource, + id_in: Input<G, id::ShaderModuleId>, + ) -> ( + id::ShaderModuleId, + Option<pipeline::CreateShaderModuleError>, + ) { + profiling::scope!("Device::create_shader_module"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.shader_modules.prepare(id_in); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + let mut trace = trace.lock(); + let data = match source { + #[cfg(feature = "wgsl")] + pipeline::ShaderModuleSource::Wgsl(ref code) => { + trace.make_binary("wgsl", code.as_bytes()) + } + pipeline::ShaderModuleSource::Naga(ref module) => { + let string = + ron::ser::to_string_pretty(module, ron::ser::PrettyConfig::default()) + .unwrap(); + trace.make_binary("ron", string.as_bytes()) + } + pipeline::ShaderModuleSource::Dummy(_) => { + panic!("found `ShaderModuleSource::Dummy`") + } + }; + trace.add(trace::Action::CreateShaderModule { + id: fid.id(), + desc: desc.clone(), + data, + }); + }; + + let shader = match device.create_shader_module(device_id, desc, source) { + Ok(shader) => shader, + Err(e) => break e, + }; + let id = fid.assign(shader, &mut token); + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + // Unsafe-ness of internal calls has little to do with unsafe-ness of this. + #[allow(unused_unsafe)] + /// # Safety + /// + /// This function passes SPIR-V binary to the backend as-is and can potentially result in a + /// driver crash. + pub unsafe fn device_create_shader_module_spirv<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &pipeline::ShaderModuleDescriptor, + source: Cow<[u32]>, + id_in: Input<G, id::ShaderModuleId>, + ) -> ( + id::ShaderModuleId, + Option<pipeline::CreateShaderModuleError>, + ) { + profiling::scope!("Device::create_shader_module"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.shader_modules.prepare(id_in); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + let mut trace = trace.lock(); + let data = trace.make_binary("spv", unsafe { + std::slice::from_raw_parts(source.as_ptr() as *const u8, source.len() * 4) + }); + trace.add(trace::Action::CreateShaderModule { + id: fid.id(), + desc: desc.clone(), + data, + }); + }; + + let shader = + match unsafe { device.create_shader_module_spirv(device_id, desc, &source) } { + Ok(shader) => shader, + Err(e) => break e, + }; + let id = fid.assign(shader, &mut token); + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + pub fn shader_module_label<A: HalApi>(&self, id: id::ShaderModuleId) -> String { + A::hub(self).shader_modules.label_for_resource(id) + } + + pub fn shader_module_drop<A: HalApi>(&self, shader_module_id: id::ShaderModuleId) { + profiling::scope!("ShaderModule::drop"); + log::debug!("shader module {:?} is dropped", shader_module_id); + + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); + let (module, _) = hub.shader_modules.unregister(shader_module_id, &mut token); + if let Some(module) = module { + let device = &device_guard[module.device_id.value]; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace + .lock() + .add(trace::Action::DestroyShaderModule(shader_module_id)); + } + unsafe { + device.raw.destroy_shader_module(module.raw); + } + } + } + + pub fn device_create_command_encoder<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &wgt::CommandEncoderDescriptor<Label>, + id_in: Input<G, id::CommandEncoderId>, + ) -> (id::CommandEncoderId, Option<DeviceError>) { + profiling::scope!("Device::create_command_encoder"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.command_buffers.prepare(id_in); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid, + }; + let dev_stored = Stored { + value: id::Valid(device_id), + ref_count: device.life_guard.add_ref(), + }; + let encoder = match device + .command_allocator + .lock() + .acquire_encoder(&device.raw, &device.queue) + { + Ok(raw) => raw, + Err(_) => break DeviceError::OutOfMemory, + }; + let command_buffer = command::CommandBuffer::new( + encoder, + dev_stored, + device.limits.clone(), + device.downlevel.clone(), + device.features, + #[cfg(feature = "trace")] + device.trace.is_some(), + &desc.label, + ); + + let id = fid.assign(command_buffer, &mut token); + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + pub fn command_buffer_label<A: HalApi>(&self, id: id::CommandBufferId) -> String { + A::hub(self).command_buffers.label_for_resource(id) + } + + pub fn command_encoder_drop<A: HalApi>(&self, command_encoder_id: id::CommandEncoderId) { + profiling::scope!("CommandEncoder::drop"); + log::debug!("command encoder {:?} is dropped", command_encoder_id); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (mut device_guard, mut token) = hub.devices.write(&mut token); + let (cmdbuf, _) = hub + .command_buffers + .unregister(command_encoder_id, &mut token); + if let Some(cmdbuf) = cmdbuf { + let device = &mut device_guard[cmdbuf.device_id.value]; + device.untrack::<G>(hub, &cmdbuf.trackers, &mut token); + device.destroy_command_buffer(cmdbuf); + } + } + + pub fn command_buffer_drop<A: HalApi>(&self, command_buffer_id: id::CommandBufferId) { + profiling::scope!("CommandBuffer::drop"); + log::debug!("command buffer {:?} is dropped", command_buffer_id); + self.command_encoder_drop::<A>(command_buffer_id) + } + + pub fn device_create_render_bundle_encoder( + &self, + device_id: id::DeviceId, + desc: &command::RenderBundleEncoderDescriptor, + ) -> ( + id::RenderBundleEncoderId, + Option<command::CreateRenderBundleError>, + ) { + profiling::scope!("Device::create_render_bundle_encoder"); + let (encoder, error) = match command::RenderBundleEncoder::new(desc, device_id, None) { + Ok(encoder) => (encoder, None), + Err(e) => (command::RenderBundleEncoder::dummy(device_id), Some(e)), + }; + (Box::into_raw(Box::new(encoder)), error) + } + + pub fn render_bundle_encoder_finish<A: HalApi>( + &self, + bundle_encoder: command::RenderBundleEncoder, + desc: &command::RenderBundleDescriptor, + id_in: Input<G, id::RenderBundleId>, + ) -> (id::RenderBundleId, Option<command::RenderBundleError>) { + profiling::scope!("RenderBundleEncoder::finish"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.render_bundles.prepare(id_in); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let error = loop { + let device = match device_guard.get(bundle_encoder.parent()) { + Ok(device) => device, + Err(_) => break command::RenderBundleError::INVALID_DEVICE, + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace.lock().add(trace::Action::CreateRenderBundle { + id: fid.id(), + desc: trace::new_render_bundle_encoder_descriptor( + desc.label.clone(), + &bundle_encoder.context, + bundle_encoder.is_depth_read_only, + bundle_encoder.is_stencil_read_only, + ), + base: bundle_encoder.to_base_pass(), + }); + } + + let render_bundle = match bundle_encoder.finish(desc, device, hub, &mut token) { + Ok(bundle) => bundle, + Err(e) => break e, + }; + + log::debug!("Render bundle"); + let ref_count = render_bundle.life_guard.add_ref(); + let id = fid.assign(render_bundle, &mut token); + + device.trackers.lock().bundles.insert_single(id, ref_count); + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + pub fn render_bundle_label<A: HalApi>(&self, id: id::RenderBundleId) -> String { + A::hub(self).render_bundles.label_for_resource(id) + } + + pub fn render_bundle_drop<A: HalApi>(&self, render_bundle_id: id::RenderBundleId) { + profiling::scope!("RenderBundle::drop"); + log::debug!("render bundle {:?} is dropped", render_bundle_id); + let hub = A::hub(self); + let mut token = Token::root(); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let device_id = { + let (mut bundle_guard, _) = hub.render_bundles.write(&mut token); + match bundle_guard.get_mut(render_bundle_id) { + Ok(bundle) => { + bundle.life_guard.ref_count.take(); + bundle.device_id.value + } + Err(InvalidId) => { + hub.render_bundles + .unregister_locked(render_bundle_id, &mut *bundle_guard); + return; + } + } + }; + + device_guard[device_id] + .lock_life(&mut token) + .suspected_resources + .render_bundles + .push(id::Valid(render_bundle_id)); + } + + pub fn device_create_query_set<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &resource::QuerySetDescriptor, + id_in: Input<G, id::QuerySetId>, + ) -> (id::QuerySetId, Option<resource::CreateQuerySetError>) { + profiling::scope!("Device::create_query_set"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.query_sets.prepare(id_in); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace.lock().add(trace::Action::CreateQuerySet { + id: fid.id(), + desc: desc.clone(), + }); + } + + let query_set = match device.create_query_set(device_id, desc) { + Ok(query_set) => query_set, + Err(err) => break err, + }; + + let ref_count = query_set.life_guard.add_ref(); + let id = fid.assign(query_set, &mut token); + + device + .trackers + .lock() + .query_sets + .insert_single(id, ref_count); + + return (id.0, None); + }; + + let id = fid.assign_error("", &mut token); + (id, Some(error)) + } + + pub fn query_set_drop<A: HalApi>(&self, query_set_id: id::QuerySetId) { + profiling::scope!("QuerySet::drop"); + log::debug!("query set {:?} is dropped", query_set_id); + + let hub = A::hub(self); + let mut token = Token::root(); + + let device_id = { + let (mut query_set_guard, _) = hub.query_sets.write(&mut token); + let query_set = query_set_guard.get_mut(query_set_id).unwrap(); + query_set.life_guard.ref_count.take(); + query_set.device_id.value + }; + + let (device_guard, mut token) = hub.devices.read(&mut token); + let device = &device_guard[device_id]; + + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace + .lock() + .add(trace::Action::DestroyQuerySet(query_set_id)); + } + + device + .lock_life(&mut token) + .suspected_resources + .query_sets + .push(id::Valid(query_set_id)); + } + + pub fn query_set_label<A: HalApi>(&self, id: id::QuerySetId) -> String { + A::hub(self).query_sets.label_for_resource(id) + } + + pub fn device_create_render_pipeline<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &pipeline::RenderPipelineDescriptor, + id_in: Input<G, id::RenderPipelineId>, + implicit_pipeline_ids: Option<ImplicitPipelineIds<G>>, + ) -> ( + id::RenderPipelineId, + Option<pipeline::CreateRenderPipelineError>, + ) { + profiling::scope!("Device::create_render_pipeline"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let fid = hub.render_pipelines.prepare(id_in); + let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub)); + + let (adapter_guard, mut token) = hub.adapters.read(&mut token); + let (device_guard, mut token) = hub.devices.read(&mut token); + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + let adapter = &adapter_guard[device.adapter_id.value]; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace.lock().add(trace::Action::CreateRenderPipeline { + id: fid.id(), + desc: desc.clone(), + implicit_context: implicit_context.clone(), + }); + } + + let pipeline = match device.create_render_pipeline( + device_id, + adapter, + desc, + implicit_context, + hub, + &mut token, + ) { + Ok(pair) => pair, + Err(e) => break e, + }; + let ref_count = pipeline.life_guard.add_ref(); + + let id = fid.assign(pipeline, &mut token); + log::info!("Created render pipeline {:?} with {:?}", id, desc); + + device + .trackers + .lock() + .render_pipelines + .insert_single(id, ref_count); + + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + /// Get an ID of one of the bind group layouts. The ID adds a refcount, + /// which needs to be released by calling `bind_group_layout_drop`. + pub fn render_pipeline_get_bind_group_layout<A: HalApi>( + &self, + pipeline_id: id::RenderPipelineId, + index: u32, + id_in: Input<G, id::BindGroupLayoutId>, + ) -> ( + id::BindGroupLayoutId, + Option<binding_model::GetBindGroupLayoutError>, + ) { + let hub = A::hub(self); + let mut token = Token::root(); + let (pipeline_layout_guard, mut token) = hub.pipeline_layouts.read(&mut token); + + let error = loop { + let (bgl_guard, mut token) = hub.bind_group_layouts.read(&mut token); + let (_, mut token) = hub.bind_groups.read(&mut token); + let (pipeline_guard, _) = hub.render_pipelines.read(&mut token); + + let pipeline = match pipeline_guard.get(pipeline_id) { + Ok(pipeline) => pipeline, + Err(_) => break binding_model::GetBindGroupLayoutError::InvalidPipeline, + }; + let id = match pipeline_layout_guard[pipeline.layout_id.value] + .bind_group_layout_ids + .get(index as usize) + { + Some(id) => id, + None => break binding_model::GetBindGroupLayoutError::InvalidGroupIndex(index), + }; + + bgl_guard[*id].multi_ref_count.inc(); + return (id.0, None); + }; + + let id = hub + .bind_group_layouts + .prepare(id_in) + .assign_error("<derived>", &mut token); + (id, Some(error)) + } + + pub fn render_pipeline_label<A: HalApi>(&self, id: id::RenderPipelineId) -> String { + A::hub(self).render_pipelines.label_for_resource(id) + } + + pub fn render_pipeline_drop<A: HalApi>(&self, render_pipeline_id: id::RenderPipelineId) { + profiling::scope!("RenderPipeline::drop"); + log::debug!("render pipeline {:?} is dropped", render_pipeline_id); + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); + + let (device_id, layout_id) = { + let (mut pipeline_guard, _) = hub.render_pipelines.write(&mut token); + match pipeline_guard.get_mut(render_pipeline_id) { + Ok(pipeline) => { + pipeline.life_guard.ref_count.take(); + (pipeline.device_id.value, pipeline.layout_id.clone()) + } + Err(InvalidId) => { + hub.render_pipelines + .unregister_locked(render_pipeline_id, &mut *pipeline_guard); + return; + } + } + }; + + let mut life_lock = device_guard[device_id].lock_life(&mut token); + life_lock + .suspected_resources + .render_pipelines + .push(id::Valid(render_pipeline_id)); + life_lock + .suspected_resources + .pipeline_layouts + .push(layout_id); + } + + pub fn device_create_compute_pipeline<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &pipeline::ComputePipelineDescriptor, + id_in: Input<G, id::ComputePipelineId>, + implicit_pipeline_ids: Option<ImplicitPipelineIds<G>>, + ) -> ( + id::ComputePipelineId, + Option<pipeline::CreateComputePipelineError>, + ) { + profiling::scope!("Device::create_compute_pipeline"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let fid = hub.compute_pipelines.prepare(id_in); + let implicit_context = implicit_pipeline_ids.map(|ipi| ipi.prepare(hub)); + + let (device_guard, mut token) = hub.devices.read(&mut token); + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace.lock().add(trace::Action::CreateComputePipeline { + id: fid.id(), + desc: desc.clone(), + implicit_context: implicit_context.clone(), + }); + } + + let pipeline = match device.create_compute_pipeline( + device_id, + desc, + implicit_context, + hub, + &mut token, + ) { + Ok(pair) => pair, + Err(e) => break e, + }; + let ref_count = pipeline.life_guard.add_ref(); + + let id = fid.assign(pipeline, &mut token); + log::info!("Created compute pipeline {:?} with {:?}", id, desc); + + device + .trackers + .lock() + .compute_pipelines + .insert_single(id, ref_count); + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + /// Get an ID of one of the bind group layouts. The ID adds a refcount, + /// which needs to be released by calling `bind_group_layout_drop`. + pub fn compute_pipeline_get_bind_group_layout<A: HalApi>( + &self, + pipeline_id: id::ComputePipelineId, + index: u32, + id_in: Input<G, id::BindGroupLayoutId>, + ) -> ( + id::BindGroupLayoutId, + Option<binding_model::GetBindGroupLayoutError>, + ) { + let hub = A::hub(self); + let mut token = Token::root(); + let (pipeline_layout_guard, mut token) = hub.pipeline_layouts.read(&mut token); + + let error = loop { + let (bgl_guard, mut token) = hub.bind_group_layouts.read(&mut token); + let (_, mut token) = hub.bind_groups.read(&mut token); + let (pipeline_guard, _) = hub.compute_pipelines.read(&mut token); + + let pipeline = match pipeline_guard.get(pipeline_id) { + Ok(pipeline) => pipeline, + Err(_) => break binding_model::GetBindGroupLayoutError::InvalidPipeline, + }; + let id = match pipeline_layout_guard[pipeline.layout_id.value] + .bind_group_layout_ids + .get(index as usize) + { + Some(id) => id, + None => break binding_model::GetBindGroupLayoutError::InvalidGroupIndex(index), + }; + + bgl_guard[*id].multi_ref_count.inc(); + return (id.0, None); + }; + + let id = hub + .bind_group_layouts + .prepare(id_in) + .assign_error("<derived>", &mut token); + (id, Some(error)) + } + + pub fn compute_pipeline_label<A: HalApi>(&self, id: id::ComputePipelineId) -> String { + A::hub(self).compute_pipelines.label_for_resource(id) + } + + pub fn compute_pipeline_drop<A: HalApi>(&self, compute_pipeline_id: id::ComputePipelineId) { + profiling::scope!("ComputePipeline::drop"); + log::debug!("compute pipeline {:?} is dropped", compute_pipeline_id); + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); + + let (device_id, layout_id) = { + let (mut pipeline_guard, _) = hub.compute_pipelines.write(&mut token); + match pipeline_guard.get_mut(compute_pipeline_id) { + Ok(pipeline) => { + pipeline.life_guard.ref_count.take(); + (pipeline.device_id.value, pipeline.layout_id.clone()) + } + Err(InvalidId) => { + hub.compute_pipelines + .unregister_locked(compute_pipeline_id, &mut *pipeline_guard); + return; + } + } + }; + + let mut life_lock = device_guard[device_id].lock_life(&mut token); + life_lock + .suspected_resources + .compute_pipelines + .push(id::Valid(compute_pipeline_id)); + life_lock + .suspected_resources + .pipeline_layouts + .push(layout_id); + } + + pub fn surface_configure<A: HalApi>( + &self, + surface_id: id::SurfaceId, + device_id: id::DeviceId, + config: &wgt::SurfaceConfiguration, + ) -> Option<present::ConfigureSurfaceError> { + use hal::{Adapter as _, Surface as _}; + use present::ConfigureSurfaceError as E; + profiling::scope!("surface_configure"); + + fn validate_surface_configuration( + config: &mut hal::SurfaceConfiguration, + caps: &hal::SurfaceCapabilities, + ) -> Result<(), E> { + let width = config.extent.width; + let height = config.extent.height; + if width < caps.extents.start().width + || width > caps.extents.end().width + || height < caps.extents.start().height + || height > caps.extents.end().height + { + log::warn!( + "Requested size {}x{} is outside of the supported range: {:?}", + width, + height, + caps.extents + ); + } + if !caps.present_modes.contains(&config.present_mode) { + let new_mode = 'b: loop { + // Automatic present mode checks. + // + // The "Automatic" modes are never supported by the backends. + let fallbacks = match config.present_mode { + wgt::PresentMode::AutoVsync => { + &[wgt::PresentMode::FifoRelaxed, wgt::PresentMode::Fifo][..] + } + // Always end in FIFO to make sure it's always supported + wgt::PresentMode::AutoNoVsync => &[ + wgt::PresentMode::Immediate, + wgt::PresentMode::Mailbox, + wgt::PresentMode::Fifo, + ][..], + _ => { + return Err(E::UnsupportedPresentMode { + requested: config.present_mode, + available: caps.present_modes.clone(), + }); + } + }; + + for &fallback in fallbacks { + if caps.present_modes.contains(&fallback) { + break 'b fallback; + } + } + + unreachable!("Fallback system failed to choose present mode. This is a bug. Mode: {:?}, Options: {:?}", config.present_mode, &caps.present_modes); + }; + + log::info!( + "Automatically choosing presentation mode by rule {:?}. Chose {new_mode:?}", + config.present_mode + ); + config.present_mode = new_mode; + } + if !caps.formats.contains(&config.format) { + return Err(E::UnsupportedFormat { + requested: config.format, + available: caps.formats.clone(), + }); + } + if !caps + .composite_alpha_modes + .contains(&config.composite_alpha_mode) + { + let new_alpha_mode = 'alpha: loop { + // Automatic alpha mode checks. + let fallbacks = match config.composite_alpha_mode { + wgt::CompositeAlphaMode::Auto => &[ + wgt::CompositeAlphaMode::Opaque, + wgt::CompositeAlphaMode::Inherit, + ][..], + _ => { + return Err(E::UnsupportedAlphaMode { + requested: config.composite_alpha_mode, + available: caps.composite_alpha_modes.clone(), + }); + } + }; + + for &fallback in fallbacks { + if caps.composite_alpha_modes.contains(&fallback) { + break 'alpha fallback; + } + } + + unreachable!( + "Fallback system failed to choose alpha mode. This is a bug. \ + AlphaMode: {:?}, Options: {:?}", + config.composite_alpha_mode, &caps.composite_alpha_modes + ); + }; + + log::info!( + "Automatically choosing alpha mode by rule {:?}. Chose {new_alpha_mode:?}", + config.composite_alpha_mode + ); + config.composite_alpha_mode = new_alpha_mode; + } + if !caps.usage.contains(config.usage) { + return Err(E::UnsupportedUsage); + } + if width == 0 || height == 0 { + return Err(E::ZeroArea); + } + Ok(()) + } + + log::info!("configuring surface with {:?}", config); + let hub = A::hub(self); + let mut token = Token::root(); + + let (mut surface_guard, mut token) = self.surfaces.write(&mut token); + let (adapter_guard, mut token) = hub.adapters.read(&mut token); + let (device_guard, _token) = hub.devices.read(&mut token); + + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace + .lock() + .add(trace::Action::ConfigureSurface(surface_id, config.clone())); + } + + let surface = match surface_guard.get_mut(surface_id) { + Ok(surface) => surface, + Err(_) => break E::InvalidSurface, + }; + + let caps = unsafe { + let suf = A::get_surface(surface); + let adapter = &adapter_guard[device.adapter_id.value]; + match adapter.raw.adapter.surface_capabilities(&suf.unwrap().raw) { + Some(caps) => caps, + None => break E::UnsupportedQueueFamily, + } + }; + + let num_frames = present::DESIRED_NUM_FRAMES + .clamp(*caps.swap_chain_sizes.start(), *caps.swap_chain_sizes.end()); + let mut hal_config = hal::SurfaceConfiguration { + swap_chain_size: num_frames, + present_mode: config.present_mode, + composite_alpha_mode: config.alpha_mode, + format: config.format, + extent: wgt::Extent3d { + width: config.width, + height: config.height, + depth_or_array_layers: 1, + }, + usage: conv::map_texture_usage(config.usage, hal::FormatAspects::COLOR), + }; + + if let Err(error) = validate_surface_configuration(&mut hal_config, &caps) { + break error; + } + + match unsafe { + A::get_surface_mut(surface) + .unwrap() + .raw + .configure(&device.raw, &hal_config) + } { + Ok(()) => (), + Err(error) => { + break match error { + hal::SurfaceError::Outdated | hal::SurfaceError::Lost => E::InvalidSurface, + hal::SurfaceError::Device(error) => E::Device(error.into()), + hal::SurfaceError::Other(message) => { + log::error!("surface configuration failed: {}", message); + E::InvalidSurface + } + } + } + } + + if let Some(present) = surface.presentation.take() { + if present.acquired_texture.is_some() { + break E::PreviousOutputExists; + } + } + + surface.presentation = Some(present::Presentation { + device_id: Stored { + value: id::Valid(device_id), + ref_count: device.life_guard.add_ref(), + }, + config: config.clone(), + num_frames, + acquired_texture: None, + }); + + return None; + }; + + Some(error) + } + + #[cfg(feature = "replay")] + /// Only triange suspected resource IDs. This helps us to avoid ID collisions + /// upon creating new resources when re-playing a trace. + pub fn device_maintain_ids<A: HalApi>( + &self, + device_id: id::DeviceId, + ) -> Result<(), InvalidDevice> { + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); + let device = device_guard.get(device_id).map_err(|_| InvalidDevice)?; + device.lock_life(&mut token).triage_suspected( + hub, + &device.trackers, + #[cfg(feature = "trace")] + None, + &mut token, + ); + Ok(()) + } + + /// Check `device_id` for freeable resources and completed buffer mappings. + /// + /// Return `queue_empty` indicating whether there are more queue submissions still in flight. + pub fn device_poll<A: HalApi>( + &self, + device_id: id::DeviceId, + maintain: wgt::Maintain<queue::WrappedSubmissionIndex>, + ) -> Result<bool, WaitIdleError> { + let (closures, queue_empty) = { + if let wgt::Maintain::WaitForSubmissionIndex(submission_index) = maintain { + if submission_index.queue_id != device_id { + return Err(WaitIdleError::WrongSubmissionIndex( + submission_index.queue_id, + device_id, + )); + } + } + + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); + device_guard + .get(device_id) + .map_err(|_| DeviceError::Invalid)? + .maintain(hub, maintain, &mut token)? + }; + + closures.fire(); + + Ok(queue_empty) + } + + /// Poll all devices belonging to the backend `A`. + /// + /// If `force_wait` is true, block until all buffer mappings are done. + /// + /// Return `all_queue_empty` indicating whether there are more queue + /// submissions still in flight. + fn poll_devices<A: HalApi>( + &self, + force_wait: bool, + closures: &mut UserClosures, + ) -> Result<bool, WaitIdleError> { + profiling::scope!("poll_devices"); + + let hub = A::hub(self); + let mut devices_to_drop = vec![]; + let mut all_queue_empty = true; + { + let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); + + for (id, device) in device_guard.iter(A::VARIANT) { + let maintain = if force_wait { + wgt::Maintain::Wait + } else { + wgt::Maintain::Poll + }; + let (cbs, queue_empty) = device.maintain(hub, maintain, &mut token)?; + all_queue_empty = all_queue_empty && queue_empty; + + // If the device's own `RefCount` clone is the only one left, and + // its submission queue is empty, then it can be freed. + if queue_empty && device.ref_count.load() == 1 { + devices_to_drop.push(id); + } + closures.extend(cbs); + } + } + + for device_id in devices_to_drop { + self.exit_device::<A>(device_id); + } + + Ok(all_queue_empty) + } + + /// Poll all devices on all backends. + /// + /// This is the implementation of `wgpu::Instance::poll_all`. + /// + /// Return `all_queue_empty` indicating whether there are more queue + /// submissions still in flight. + pub fn poll_all_devices(&self, force_wait: bool) -> Result<bool, WaitIdleError> { + let mut closures = UserClosures::default(); + let mut all_queue_empty = true; + + #[cfg(feature = "vulkan")] + { + all_queue_empty = self.poll_devices::<hal::api::Vulkan>(force_wait, &mut closures)? + && all_queue_empty; + } + #[cfg(feature = "metal")] + { + all_queue_empty = + self.poll_devices::<hal::api::Metal>(force_wait, &mut closures)? && all_queue_empty; + } + #[cfg(feature = "dx12")] + { + all_queue_empty = + self.poll_devices::<hal::api::Dx12>(force_wait, &mut closures)? && all_queue_empty; + } + #[cfg(feature = "dx11")] + { + all_queue_empty = + self.poll_devices::<hal::api::Dx11>(force_wait, &mut closures)? && all_queue_empty; + } + #[cfg(feature = "gles")] + { + all_queue_empty = + self.poll_devices::<hal::api::Gles>(force_wait, &mut closures)? && all_queue_empty; + } + + closures.fire(); + + Ok(all_queue_empty) + } + + pub fn device_label<A: HalApi>(&self, id: id::DeviceId) -> String { + A::hub(self).devices.label_for_resource(id) + } + + pub fn device_start_capture<A: HalApi>(&self, id: id::DeviceId) { + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, _) = hub.devices.read(&mut token); + if let Ok(device) = device_guard.get(id) { + unsafe { device.raw.start_capture() }; + } + } + + pub fn device_stop_capture<A: HalApi>(&self, id: id::DeviceId) { + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, _) = hub.devices.read(&mut token); + if let Ok(device) = device_guard.get(id) { + unsafe { device.raw.stop_capture() }; + } + } + + pub fn device_drop<A: HalApi>(&self, device_id: id::DeviceId) { + profiling::scope!("Device::drop"); + log::debug!("device {:?} is dropped", device_id); + + let hub = A::hub(self); + let mut token = Token::root(); + + // For now, just drop the `RefCount` in `device.life_guard`, which + // stands for the user's reference to the device. We'll take care of + // cleaning up the device when we're polled, once its queue submissions + // have completed and it is no longer needed by other resources. + let (mut device_guard, _) = hub.devices.write(&mut token); + if let Ok(device) = device_guard.get_mut(device_id) { + device.life_guard.ref_count.take().unwrap(); + } + } + + /// Exit the unreferenced, inactive device `device_id`. + fn exit_device<A: HalApi>(&self, device_id: id::DeviceId) { + let hub = A::hub(self); + let mut token = Token::root(); + let mut free_adapter_id = None; + { + let (device, mut _token) = hub.devices.unregister(device_id, &mut token); + if let Some(mut device) = device { + // The things `Device::prepare_to_die` takes care are mostly + // unnecessary here. We know our queue is empty, so we don't + // need to wait for submissions or triage them. We know we were + // just polled, so `life_tracker.free_resources` is empty. + debug_assert!(device.lock_life(&mut _token).queue_empty()); + device.pending_writes.deactivate(); + + // Adapter is only referenced by the device and itself. + // This isn't a robust way to destroy them, we should find a better one. + if device.adapter_id.ref_count.load() == 1 { + free_adapter_id = Some(device.adapter_id.value.0); + } + + device.dispose(); + } + } + + // Free the adapter now that we've dropped the `Device` token. + if let Some(free_adapter_id) = free_adapter_id { + let _ = hub.adapters.unregister(free_adapter_id, &mut token); + } + } + + pub fn buffer_map_async<A: HalApi>( + &self, + buffer_id: id::BufferId, + range: Range<BufferAddress>, + op: BufferMapOperation, + ) -> BufferAccessResult { + // User callbacks must not be called while holding buffer_map_async_inner's locks, so we + // defer the error callback if it needs to be called immediately (typically when running + // into errors). + if let Err((op, err)) = self.buffer_map_async_inner::<A>(buffer_id, range, op) { + op.callback.call(Err(err.clone())); + + return Err(err); + } + + Ok(()) + } + + // Returns the mapping callback in case of error so that the callback can be fired outside + // of the locks that are held in this function. + fn buffer_map_async_inner<A: HalApi>( + &self, + buffer_id: id::BufferId, + range: Range<BufferAddress>, + op: BufferMapOperation, + ) -> Result<(), (BufferMapOperation, BufferAccessError)> { + profiling::scope!("Buffer::map_async"); + + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); + let (pub_usage, internal_use) = match op.host { + HostMap::Read => (wgt::BufferUsages::MAP_READ, hal::BufferUses::MAP_READ), + HostMap::Write => (wgt::BufferUsages::MAP_WRITE, hal::BufferUses::MAP_WRITE), + }; + + if range.start % wgt::MAP_ALIGNMENT != 0 || range.end % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err((op, BufferAccessError::UnalignedRange)); + } + + let (device_id, ref_count) = { + let (mut buffer_guard, _) = hub.buffers.write(&mut token); + let buffer = buffer_guard + .get_mut(buffer_id) + .map_err(|_| BufferAccessError::Invalid); + + let buffer = match buffer { + Ok(b) => b, + Err(e) => { + return Err((op, e)); + } + }; + + if let Err(e) = check_buffer_usage(buffer.usage, pub_usage) { + return Err((op, e.into())); + } + + if range.start > range.end { + return Err(( + op, + BufferAccessError::NegativeRange { + start: range.start, + end: range.end, + }, + )); + } + if range.end > buffer.size { + return Err(( + op, + BufferAccessError::OutOfBoundsOverrun { + index: range.end, + max: buffer.size, + }, + )); + } + + buffer.map_state = match buffer.map_state { + resource::BufferMapState::Init { .. } | resource::BufferMapState::Active { .. } => { + return Err((op, BufferAccessError::AlreadyMapped)); + } + resource::BufferMapState::Waiting(_) => { + return Err((op, BufferAccessError::MapAlreadyPending)); + } + resource::BufferMapState::Idle => { + resource::BufferMapState::Waiting(resource::BufferPendingMapping { + range, + op, + _parent_ref_count: buffer.life_guard.add_ref(), + }) + } + }; + log::debug!("Buffer {:?} map state -> Waiting", buffer_id); + + let device = &device_guard[buffer.device_id.value]; + + let ret = (buffer.device_id.value, buffer.life_guard.add_ref()); + + let mut trackers = device.trackers.lock(); + trackers + .buffers + .set_single(&*buffer_guard, buffer_id, internal_use); + trackers.buffers.drain(); + + ret + }; + + let device = &device_guard[device_id]; + + device + .lock_life(&mut token) + .map(id::Valid(buffer_id), ref_count); + + Ok(()) + } + + pub fn buffer_get_mapped_range<A: HalApi>( + &self, + buffer_id: id::BufferId, + offset: BufferAddress, + size: Option<BufferAddress>, + ) -> Result<(*mut u8, u64), BufferAccessError> { + profiling::scope!("Buffer::get_mapped_range"); + + let hub = A::hub(self); + let mut token = Token::root(); + let (buffer_guard, _) = hub.buffers.read(&mut token); + let buffer = buffer_guard + .get(buffer_id) + .map_err(|_| BufferAccessError::Invalid)?; + + let range_size = if let Some(size) = size { + size + } else if offset > buffer.size { + 0 + } else { + buffer.size - offset + }; + + if offset % wgt::MAP_ALIGNMENT != 0 { + return Err(BufferAccessError::UnalignedOffset { offset }); + } + if range_size % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(BufferAccessError::UnalignedRangeSize { range_size }); + } + + match buffer.map_state { + resource::BufferMapState::Init { ptr, .. } => { + // offset (u64) can not be < 0, so no need to validate the lower bound + if offset + range_size > buffer.size { + return Err(BufferAccessError::OutOfBoundsOverrun { + index: offset + range_size - 1, + max: buffer.size, + }); + } + unsafe { Ok((ptr.as_ptr().offset(offset as isize), range_size)) } + } + resource::BufferMapState::Active { ptr, ref range, .. } => { + if offset < range.start { + return Err(BufferAccessError::OutOfBoundsUnderrun { + index: offset, + min: range.start, + }); + } + if offset + range_size > range.end { + return Err(BufferAccessError::OutOfBoundsOverrun { + index: offset + range_size - 1, + max: range.end, + }); + } + // ptr points to the beginning of the range we mapped in map_async + // rather thant the beginning of the buffer. + let relative_offset = (offset - range.start) as isize; + unsafe { Ok((ptr.as_ptr().offset(relative_offset), range_size)) } + } + resource::BufferMapState::Idle | resource::BufferMapState::Waiting(_) => { + Err(BufferAccessError::NotMapped) + } + } + } + + fn buffer_unmap_inner<A: HalApi>( + &self, + buffer_id: id::BufferId, + buffer: &mut resource::Buffer<A>, + device: &mut Device<A>, + ) -> Result<Option<BufferMapPendingClosure>, BufferAccessError> { + log::debug!("Buffer {:?} map state -> Idle", buffer_id); + match mem::replace(&mut buffer.map_state, resource::BufferMapState::Idle) { + resource::BufferMapState::Init { + ptr, + stage_buffer, + needs_flush, + } => { + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + let mut trace = trace.lock(); + let data = trace.make_binary("bin", unsafe { + std::slice::from_raw_parts(ptr.as_ptr(), buffer.size as usize) + }); + trace.add(trace::Action::WriteBuffer { + id: buffer_id, + data, + range: 0..buffer.size, + queued: true, + }); + } + let _ = ptr; + if needs_flush { + unsafe { + device + .raw + .flush_mapped_ranges(&stage_buffer, iter::once(0..buffer.size)); + } + } + + let raw_buf = buffer.raw.as_ref().ok_or(BufferAccessError::Destroyed)?; + + buffer.life_guard.use_at(device.active_submission_index + 1); + let region = wgt::BufferSize::new(buffer.size).map(|size| hal::BufferCopy { + src_offset: 0, + dst_offset: 0, + size, + }); + let transition_src = hal::BufferBarrier { + buffer: &stage_buffer, + usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, + }; + let transition_dst = hal::BufferBarrier { + buffer: raw_buf, + usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST, + }; + let encoder = device.pending_writes.activate(); + unsafe { + encoder.transition_buffers( + iter::once(transition_src).chain(iter::once(transition_dst)), + ); + if buffer.size > 0 { + encoder.copy_buffer_to_buffer(&stage_buffer, raw_buf, region.into_iter()); + } + } + device + .pending_writes + .consume_temp(queue::TempResource::Buffer(stage_buffer)); + device.pending_writes.dst_buffers.insert(buffer_id); + } + resource::BufferMapState::Idle => { + return Err(BufferAccessError::NotMapped); + } + resource::BufferMapState::Waiting(pending) => { + return Ok(Some((pending.op, Err(BufferAccessError::MapAborted)))); + } + resource::BufferMapState::Active { ptr, range, host } => { + if host == HostMap::Write { + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + let mut trace = trace.lock(); + let size = range.end - range.start; + let data = trace.make_binary("bin", unsafe { + std::slice::from_raw_parts(ptr.as_ptr(), size as usize) + }); + trace.add(trace::Action::WriteBuffer { + id: buffer_id, + data, + range: range.clone(), + queued: false, + }); + } + let _ = (ptr, range); + } + unsafe { + device + .raw + .unmap_buffer(buffer.raw.as_ref().unwrap()) + .map_err(DeviceError::from)? + }; + } + } + Ok(None) + } + + pub fn buffer_unmap<A: HalApi>(&self, buffer_id: id::BufferId) -> BufferAccessResult { + profiling::scope!("unmap", "Buffer"); + + let closure; + { + // Restrict the locks to this scope. + let hub = A::hub(self); + let mut token = Token::root(); + + let (mut device_guard, mut token) = hub.devices.write(&mut token); + let (mut buffer_guard, _) = hub.buffers.write(&mut token); + let buffer = buffer_guard + .get_mut(buffer_id) + .map_err(|_| BufferAccessError::Invalid)?; + let device = &mut device_guard[buffer.device_id.value]; + + closure = self.buffer_unmap_inner(buffer_id, buffer, device) + } + + // Note: outside the scope where locks are held when calling the callback + if let Some((operation, status)) = closure? { + operation.callback.call(status); + } + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/device/queue.rs b/third_party/rust/wgpu-core/src/device/queue.rs new file mode 100644 index 0000000000..ba0fb053da --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/queue.rs @@ -0,0 +1,1218 @@ +#[cfg(feature = "trace")] +use crate::device::trace::Action; +use crate::{ + command::{ + extract_texture_selector, validate_linear_texture_data, validate_texture_copy_range, + ClearError, CommandBuffer, CopySide, ImageCopyTexture, TransferError, + }, + conv, + device::{DeviceError, WaitIdleError}, + get_lowest_common_denom, + hub::{Global, GlobalIdentityHandlerFactory, HalApi, Input, Token}, + id, + init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange}, + resource::{BufferAccessError, BufferMapState, StagingBuffer, TextureInner}, + track, FastHashSet, SubmissionIndex, +}; + +use hal::{CommandEncoder as _, Device as _, Queue as _}; +use parking_lot::Mutex; +use smallvec::SmallVec; +use std::{iter, mem, num::NonZeroU32, ptr}; +use thiserror::Error; + +/// Number of command buffers that we generate from the same pool +/// for the write_xxx commands, before the pool is recycled. +/// +/// If we don't stop at some point, the pool will grow forever, +/// without a concrete moment of when it can be cleared. +const WRITE_COMMAND_BUFFERS_PER_POOL: usize = 64; + +#[repr(C)] +pub struct SubmittedWorkDoneClosureC { + pub callback: unsafe extern "C" fn(user_data: *mut u8), + pub user_data: *mut u8, +} + +unsafe impl Send for SubmittedWorkDoneClosureC {} + +pub struct SubmittedWorkDoneClosure { + // We wrap this so creating the enum in the C variant can be unsafe, + // allowing our call function to be safe. + inner: SubmittedWorkDoneClosureInner, +} + +enum SubmittedWorkDoneClosureInner { + Rust { + callback: Box<dyn FnOnce() + Send + 'static>, + }, + C { + inner: SubmittedWorkDoneClosureC, + }, +} + +impl SubmittedWorkDoneClosure { + pub fn from_rust(callback: Box<dyn FnOnce() + Send + 'static>) -> Self { + Self { + inner: SubmittedWorkDoneClosureInner::Rust { callback }, + } + } + + /// # Safety + /// + /// - The callback pointer must be valid to call with the provided `user_data` + /// pointer. + /// + /// - Both pointers must point to `'static` data, as the callback may happen at + /// an unspecified time. + pub unsafe fn from_c(inner: SubmittedWorkDoneClosureC) -> Self { + Self { + inner: SubmittedWorkDoneClosureInner::C { inner }, + } + } + + pub(crate) fn call(self) { + match self.inner { + SubmittedWorkDoneClosureInner::Rust { callback } => callback(), + // SAFETY: the contract of the call to from_c says that this unsafe is sound. + SubmittedWorkDoneClosureInner::C { inner } => unsafe { + (inner.callback)(inner.user_data) + }, + } + } +} + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct WrappedSubmissionIndex { + pub queue_id: id::QueueId, + pub index: SubmissionIndex, +} + +/// A texture or buffer to be freed soon. +/// +/// This is just a tagged raw texture or buffer, generally about to be added to +/// some other more specific container like: +/// +/// - `PendingWrites::temp_resources`: resources used by queue writes and +/// unmaps, waiting to be folded in with the next queue submission +/// +/// - `ActiveSubmission::last_resources`: temporary resources used by a queue +/// submission, to be freed when it completes +/// +/// - `LifetimeTracker::free_resources`: resources to be freed in the next +/// `maintain` call, no longer used anywhere +#[derive(Debug)] +pub enum TempResource<A: hal::Api> { + Buffer(A::Buffer), + Texture(A::Texture, SmallVec<[A::TextureView; 1]>), +} + +/// A queue execution for a particular command encoder. +pub(super) struct EncoderInFlight<A: hal::Api> { + raw: A::CommandEncoder, + cmd_buffers: Vec<A::CommandBuffer>, +} + +impl<A: hal::Api> EncoderInFlight<A> { + pub(super) unsafe fn land(mut self) -> A::CommandEncoder { + unsafe { self.raw.reset_all(self.cmd_buffers.into_iter()) }; + self.raw + } +} + +/// A private command encoder for writes made directly on the device +/// or queue. +/// +/// Operations like `buffer_unmap`, `queue_write_buffer`, and +/// `queue_write_texture` need to copy data to the GPU. At the hal +/// level, this must be done by encoding and submitting commands, but +/// these operations are not associated with any specific wgpu command +/// buffer. +/// +/// Instead, `Device::pending_writes` owns one of these values, which +/// has its own hal command encoder and resource lists. The commands +/// accumulated here are automatically submitted to the queue the next +/// time the user submits a wgpu command buffer, ahead of the user's +/// commands. +/// +/// All uses of [`StagingBuffer`]s end up here. +#[derive(Debug)] +pub(crate) struct PendingWrites<A: hal::Api> { + pub command_encoder: A::CommandEncoder, + pub is_active: bool, + pub temp_resources: Vec<TempResource<A>>, + pub dst_buffers: FastHashSet<id::BufferId>, + pub dst_textures: FastHashSet<id::TextureId>, + pub executing_command_buffers: Vec<A::CommandBuffer>, +} + +impl<A: hal::Api> PendingWrites<A> { + pub fn new(command_encoder: A::CommandEncoder) -> Self { + Self { + command_encoder, + is_active: false, + temp_resources: Vec::new(), + dst_buffers: FastHashSet::default(), + dst_textures: FastHashSet::default(), + executing_command_buffers: Vec::new(), + } + } + + pub fn dispose(mut self, device: &A::Device) { + unsafe { + if self.is_active { + self.command_encoder.discard_encoding(); + } + self.command_encoder + .reset_all(self.executing_command_buffers.into_iter()); + device.destroy_command_encoder(self.command_encoder); + } + + for resource in self.temp_resources { + match resource { + TempResource::Buffer(buffer) => unsafe { + device.destroy_buffer(buffer); + }, + TempResource::Texture(texture, views) => unsafe { + for view in views.into_iter() { + device.destroy_texture_view(view); + } + device.destroy_texture(texture); + }, + } + } + } + + pub fn consume_temp(&mut self, resource: TempResource<A>) { + self.temp_resources.push(resource); + } + + fn consume(&mut self, buffer: StagingBuffer<A>) { + self.temp_resources.push(TempResource::Buffer(buffer.raw)); + } + + #[must_use] + fn pre_submit(&mut self) -> Option<&A::CommandBuffer> { + self.dst_buffers.clear(); + self.dst_textures.clear(); + if self.is_active { + let cmd_buf = unsafe { self.command_encoder.end_encoding().unwrap() }; + self.is_active = false; + self.executing_command_buffers.push(cmd_buf); + self.executing_command_buffers.last() + } else { + None + } + } + + #[must_use] + fn post_submit( + &mut self, + command_allocator: &Mutex<super::CommandAllocator<A>>, + device: &A::Device, + queue: &A::Queue, + ) -> Option<EncoderInFlight<A>> { + if self.executing_command_buffers.len() >= WRITE_COMMAND_BUFFERS_PER_POOL { + let new_encoder = command_allocator + .lock() + .acquire_encoder(device, queue) + .unwrap(); + Some(EncoderInFlight { + raw: mem::replace(&mut self.command_encoder, new_encoder), + cmd_buffers: mem::take(&mut self.executing_command_buffers), + }) + } else { + None + } + } + + pub fn activate(&mut self) -> &mut A::CommandEncoder { + if !self.is_active { + unsafe { + self.command_encoder + .begin_encoding(Some("(wgpu internal) PendingWrites")) + .unwrap(); + } + self.is_active = true; + } + &mut self.command_encoder + } + + pub fn deactivate(&mut self) { + if self.is_active { + unsafe { + self.command_encoder.discard_encoding(); + } + self.is_active = false; + } + } +} + +fn prepare_staging_buffer<A: HalApi>( + device: &mut A::Device, + size: wgt::BufferAddress, +) -> Result<(StagingBuffer<A>, *mut u8), DeviceError> { + profiling::scope!("prepare_staging_buffer"); + let stage_desc = hal::BufferDescriptor { + label: Some("(wgpu internal) Staging"), + size, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC, + memory_flags: hal::MemoryFlags::TRANSIENT, + }; + + let buffer = unsafe { device.create_buffer(&stage_desc)? }; + let mapping = unsafe { device.map_buffer(&buffer, 0..size) }?; + + let staging_buffer = StagingBuffer { + raw: buffer, + size, + is_coherent: mapping.is_coherent, + }; + + Ok((staging_buffer, mapping.ptr.as_ptr())) +} + +impl<A: hal::Api> StagingBuffer<A> { + unsafe fn flush(&self, device: &A::Device) -> Result<(), DeviceError> { + if !self.is_coherent { + unsafe { device.flush_mapped_ranges(&self.raw, iter::once(0..self.size)) }; + } + unsafe { device.unmap_buffer(&self.raw)? }; + Ok(()) + } +} + +#[derive(Clone, Debug, Error)] +#[error("queue is invalid")] +pub struct InvalidQueue; + +#[derive(Clone, Debug, Error)] +pub enum QueueWriteError { + #[error(transparent)] + Queue(#[from] DeviceError), + #[error(transparent)] + Transfer(#[from] TransferError), + #[error(transparent)] + MemoryInitFailure(#[from] ClearError), +} + +#[derive(Clone, Debug, Error)] +pub enum QueueSubmitError { + #[error(transparent)] + Queue(#[from] DeviceError), + #[error("buffer {0:?} is destroyed")] + DestroyedBuffer(id::BufferId), + #[error("texture {0:?} is destroyed")] + DestroyedTexture(id::TextureId), + #[error(transparent)] + Unmap(#[from] BufferAccessError), + #[error("surface output was dropped before the command buffer got submitted")] + SurfaceOutputDropped, + #[error("surface was unconfigured before the command buffer got submitted")] + SurfaceUnconfigured, + #[error("GPU got stuck :(")] + StuckGpu, +} + +//TODO: move out common parts of write_xxx. + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + pub fn queue_write_buffer<A: HalApi>( + &self, + queue_id: id::QueueId, + buffer_id: id::BufferId, + buffer_offset: wgt::BufferAddress, + data: &[u8], + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::write_buffer"); + + let hub = A::hub(self); + let root_token = &mut Token::root(); + + let (mut device_guard, ref mut device_token) = hub.devices.write(root_token); + let device = device_guard + .get_mut(queue_id) + .map_err(|_| DeviceError::Invalid)?; + + let data_size = data.len() as wgt::BufferAddress; + + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + let mut trace = trace.lock(); + let data_path = trace.make_binary("bin", data); + trace.add(Action::WriteBuffer { + id: buffer_id, + data: data_path, + range: buffer_offset..buffer_offset + data_size, + queued: true, + }); + } + + if data_size == 0 { + log::trace!("Ignoring write_buffer of size 0"); + return Ok(()); + } + + // Platform validation requires that the staging buffer always be + // freed, even if an error occurs. All paths from here must call + // `device.pending_writes.consume`. + let (staging_buffer, staging_buffer_ptr) = + prepare_staging_buffer(&mut device.raw, data_size)?; + + if let Err(flush_error) = unsafe { + profiling::scope!("copy"); + ptr::copy_nonoverlapping(data.as_ptr(), staging_buffer_ptr, data.len()); + staging_buffer.flush(&device.raw) + } { + device.pending_writes.consume(staging_buffer); + return Err(flush_error.into()); + } + + let result = self.queue_write_staging_buffer_impl( + device, + device_token, + &staging_buffer, + buffer_id, + buffer_offset, + ); + + device.pending_writes.consume(staging_buffer); + result + } + + pub fn queue_create_staging_buffer<A: HalApi>( + &self, + queue_id: id::QueueId, + buffer_size: wgt::BufferSize, + id_in: Input<G, id::StagingBufferId>, + ) -> Result<(id::StagingBufferId, *mut u8), QueueWriteError> { + let hub = A::hub(self); + let root_token = &mut Token::root(); + + let (mut device_guard, ref mut device_token) = hub.devices.write(root_token); + let device = device_guard + .get_mut(queue_id) + .map_err(|_| DeviceError::Invalid)?; + + let (staging_buffer, staging_buffer_ptr) = + prepare_staging_buffer(&mut device.raw, buffer_size.get())?; + + let fid = hub.staging_buffers.prepare(id_in); + let id = fid.assign(staging_buffer, device_token); + + Ok((id.0, staging_buffer_ptr)) + } + + pub fn queue_write_staging_buffer<A: HalApi>( + &self, + queue_id: id::QueueId, + buffer_id: id::BufferId, + buffer_offset: wgt::BufferAddress, + staging_buffer_id: id::StagingBufferId, + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::write_buffer_with"); + + let hub = A::hub(self); + let root_token = &mut Token::root(); + + let (mut device_guard, ref mut device_token) = hub.devices.write(root_token); + let device = device_guard + .get_mut(queue_id) + .map_err(|_| DeviceError::Invalid)?; + + let staging_buffer = hub + .staging_buffers + .unregister(staging_buffer_id, device_token) + .0 + .ok_or(TransferError::InvalidBuffer(buffer_id))?; + + // At this point, we have taken ownership of the staging_buffer from the + // user. Platform validation requires that the staging buffer always + // be freed, even if an error occurs. All paths from here must call + // `device.pending_writes.consume`. + if let Err(flush_error) = unsafe { staging_buffer.flush(&device.raw) } { + device.pending_writes.consume(staging_buffer); + return Err(flush_error.into()); + } + + let result = self.queue_write_staging_buffer_impl( + device, + device_token, + &staging_buffer, + buffer_id, + buffer_offset, + ); + + device.pending_writes.consume(staging_buffer); + result + } + + pub fn queue_validate_write_buffer<A: HalApi>( + &self, + _queue_id: id::QueueId, + buffer_id: id::BufferId, + buffer_offset: u64, + buffer_size: u64, + ) -> Result<(), QueueWriteError> { + let hub = A::hub(self); + let root_token = &mut Token::root(); + + let (_, ref mut device_token) = hub.devices.read(root_token); + + let buffer_guard = hub.buffers.read(device_token).0; + let buffer = buffer_guard + .get(buffer_id) + .map_err(|_| TransferError::InvalidBuffer(buffer_id))?; + + self.queue_validate_write_buffer_impl(buffer, buffer_id, buffer_offset, buffer_size)?; + + Ok(()) + } + + fn queue_validate_write_buffer_impl<A: HalApi>( + &self, + buffer: &super::resource::Buffer<A>, + buffer_id: id::BufferId, + buffer_offset: u64, + buffer_size: u64, + ) -> Result<(), TransferError> { + if !buffer.usage.contains(wgt::BufferUsages::COPY_DST) { + return Err(TransferError::MissingCopyDstUsageFlag( + Some(buffer_id), + None, + )); + } + if buffer_size % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedCopySize(buffer_size)); + } + if buffer_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedBufferOffset(buffer_offset)); + } + if buffer_offset + buffer_size > buffer.size { + return Err(TransferError::BufferOverrun { + start_offset: buffer_offset, + end_offset: buffer_offset + buffer_size, + buffer_size: buffer.size, + side: CopySide::Destination, + }); + } + + Ok(()) + } + + fn queue_write_staging_buffer_impl<A: HalApi>( + &self, + device: &mut super::Device<A>, + device_token: &mut Token<super::Device<A>>, + staging_buffer: &StagingBuffer<A>, + buffer_id: id::BufferId, + buffer_offset: u64, + ) -> Result<(), QueueWriteError> { + let hub = A::hub(self); + + let buffer_guard = hub.buffers.read(device_token).0; + + let mut trackers = device.trackers.lock(); + let (dst, transition) = trackers + .buffers + .set_single(&buffer_guard, buffer_id, hal::BufferUses::COPY_DST) + .ok_or(TransferError::InvalidBuffer(buffer_id))?; + let dst_raw = dst + .raw + .as_ref() + .ok_or(TransferError::InvalidBuffer(buffer_id))?; + + let src_buffer_size = staging_buffer.size; + self.queue_validate_write_buffer_impl(dst, buffer_id, buffer_offset, src_buffer_size)?; + + dst.life_guard.use_at(device.active_submission_index + 1); + + let region = wgt::BufferSize::new(src_buffer_size).map(|size| hal::BufferCopy { + src_offset: 0, + dst_offset: buffer_offset, + size, + }); + let barriers = iter::once(hal::BufferBarrier { + buffer: &staging_buffer.raw, + usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, + }) + .chain(transition.map(|pending| pending.into_hal(dst))); + let encoder = device.pending_writes.activate(); + unsafe { + encoder.transition_buffers(barriers); + encoder.copy_buffer_to_buffer(&staging_buffer.raw, dst_raw, region.into_iter()); + } + + device.pending_writes.dst_buffers.insert(buffer_id); + + // Ensure the overwritten bytes are marked as initialized so + // they don't need to be nulled prior to mapping or binding. + { + drop(buffer_guard); + let mut buffer_guard = hub.buffers.write(device_token).0; + + let dst = buffer_guard.get_mut(buffer_id).unwrap(); + dst.initialization_status + .drain(buffer_offset..(buffer_offset + src_buffer_size)); + } + + Ok(()) + } + + pub fn queue_write_texture<A: HalApi>( + &self, + queue_id: id::QueueId, + destination: &ImageCopyTexture, + data: &[u8], + data_layout: &wgt::ImageDataLayout, + size: &wgt::Extent3d, + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::write_texture"); + + let hub = A::hub(self); + let mut token = Token::root(); + let (mut device_guard, mut token) = hub.devices.write(&mut token); + let device = device_guard + .get_mut(queue_id) + .map_err(|_| DeviceError::Invalid)?; + + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + let mut trace = trace.lock(); + let data_path = trace.make_binary("bin", data); + trace.add(Action::WriteTexture { + to: destination.clone(), + data: data_path, + layout: *data_layout, + size: *size, + }); + } + + if size.width == 0 || size.height == 0 || size.depth_or_array_layers == 0 { + log::trace!("Ignoring write_texture of size 0"); + return Ok(()); + } + + let (mut texture_guard, _) = hub.textures.write(&mut token); // For clear we need write access to the texture. TODO: Can we acquire write lock later? + let dst = texture_guard.get_mut(destination.texture).unwrap(); + + let (selector, dst_base, texture_format) = + extract_texture_selector(destination, size, dst)?; + let format_desc = texture_format.describe(); + + if !dst.desc.usage.contains(wgt::TextureUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), + ); + } + + // Note: Doing the copy range validation early is important because ensures that the + // dimensions are not going to cause overflow in other parts of the validation. + let (hal_copy_size, array_layer_count) = + validate_texture_copy_range(destination, &dst.desc, CopySide::Destination, size)?; + + // Note: `_source_bytes_per_array_layer` is ignored since we + // have a staging copy, and it can have a different value. + let (_, _source_bytes_per_array_layer) = validate_linear_texture_data( + data_layout, + texture_format, + data.len() as wgt::BufferAddress, + CopySide::Source, + format_desc.block_size as wgt::BufferAddress, + size, + false, + )?; + + if !conv::is_valid_copy_dst_texture_format(texture_format, destination.aspect) { + return Err(TransferError::CopyToForbiddenTextureFormat { + format: texture_format, + aspect: destination.aspect, + } + .into()); + } + let (block_width, block_height) = format_desc.block_dimensions; + let width_blocks = size.width / block_width as u32; + let height_blocks = size.height / block_height as u32; + + let block_rows_per_image = match data_layout.rows_per_image { + Some(rows_per_image) => rows_per_image.get(), + None => { + // doesn't really matter because we need this only if we copy + // more than one layer, and then we validate for this being not + // None + size.height + } + }; + + let bytes_per_row_alignment = get_lowest_common_denom( + device.alignments.buffer_copy_pitch.get() as u32, + format_desc.block_size as u32, + ); + let stage_bytes_per_row = hal::auxil::align_to( + format_desc.block_size as u32 * width_blocks, + bytes_per_row_alignment, + ); + + let block_rows_in_copy = + (size.depth_or_array_layers - 1) * block_rows_per_image + height_blocks; + let stage_size = stage_bytes_per_row as u64 * block_rows_in_copy as u64; + + if !dst.desc.usage.contains(wgt::TextureUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), + ); + } + + let mut trackers = device.trackers.lock(); + let encoder = device.pending_writes.activate(); + + // If the copy does not fully cover the layers, we need to initialize to + // zero *first* as we don't keep track of partial texture layer inits. + // + // Strictly speaking we only need to clear the areas of a layer + // untouched, but this would get increasingly messy. + let init_layer_range = if dst.desc.dimension == wgt::TextureDimension::D3 { + // volume textures don't have a layer range as array volumes aren't supported + 0..1 + } else { + destination.origin.z..destination.origin.z + size.depth_or_array_layers + }; + if dst.initialization_status.mips[destination.mip_level as usize] + .check(init_layer_range.clone()) + .is_some() + { + if has_copy_partial_init_tracker_coverage(size, destination.mip_level, &dst.desc) { + for layer_range in dst.initialization_status.mips[destination.mip_level as usize] + .drain(init_layer_range) + .collect::<Vec<std::ops::Range<u32>>>() + { + crate::command::clear_texture( + &*texture_guard, + id::Valid(destination.texture), + TextureInitRange { + mip_range: destination.mip_level..(destination.mip_level + 1), + layer_range, + }, + encoder, + &mut trackers.textures, + &device.alignments, + &device.zero_buffer, + ) + .map_err(QueueWriteError::from)?; + } + } else { + dst.initialization_status.mips[destination.mip_level as usize] + .drain(init_layer_range); + } + } + + let dst = texture_guard.get(destination.texture).unwrap(); + let transition = trackers + .textures + .set_single( + dst, + destination.texture, + selector, + hal::TextureUses::COPY_DST, + ) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + + dst.life_guard.use_at(device.active_submission_index + 1); + + let dst_raw = dst + .inner + .as_raw() + .ok_or(TransferError::InvalidTexture(destination.texture))?; + + let bytes_per_row = if let Some(bytes_per_row) = data_layout.bytes_per_row { + bytes_per_row.get() + } else { + width_blocks * format_desc.block_size as u32 + }; + + // Platform validation requires that the staging buffer always be + // freed, even if an error occurs. All paths from here must call + // `device.pending_writes.consume`. + let (staging_buffer, staging_buffer_ptr) = + prepare_staging_buffer(&mut device.raw, stage_size)?; + + if stage_bytes_per_row == bytes_per_row { + profiling::scope!("copy aligned"); + // Fast path if the data is already being aligned optimally. + unsafe { + ptr::copy_nonoverlapping( + data.as_ptr().offset(data_layout.offset as isize), + staging_buffer_ptr, + stage_size as usize, + ); + } + } else { + profiling::scope!("copy chunked"); + // Copy row by row into the optimal alignment. + let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize; + for layer in 0..size.depth_or_array_layers { + let rows_offset = layer * block_rows_per_image; + for row in 0..height_blocks { + unsafe { + ptr::copy_nonoverlapping( + data.as_ptr().offset( + data_layout.offset as isize + + (rows_offset + row) as isize * bytes_per_row as isize, + ), + staging_buffer_ptr.offset( + (rows_offset + row) as isize * stage_bytes_per_row as isize, + ), + copy_bytes_per_row, + ); + } + } + } + } + + if let Err(e) = unsafe { staging_buffer.flush(&device.raw) } { + device.pending_writes.consume(staging_buffer); + return Err(e.into()); + } + + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = dst_base.clone(); + texture_base.array_layer += rel_array_layer; + hal::BufferTextureCopy { + buffer_layout: wgt::ImageDataLayout { + offset: rel_array_layer as u64 + * block_rows_per_image as u64 + * stage_bytes_per_row as u64, + bytes_per_row: NonZeroU32::new(stage_bytes_per_row), + rows_per_image: NonZeroU32::new(block_rows_per_image), + }, + texture_base, + size: hal_copy_size, + } + }); + let barrier = hal::BufferBarrier { + buffer: &staging_buffer.raw, + usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, + }; + + unsafe { + encoder + .transition_textures(transition.map(|pending| pending.into_hal(dst)).into_iter()); + encoder.transition_buffers(iter::once(barrier)); + encoder.copy_buffer_to_texture(&staging_buffer.raw, dst_raw, regions); + } + + device.pending_writes.consume(staging_buffer); + device + .pending_writes + .dst_textures + .insert(destination.texture); + + Ok(()) + } + + pub fn queue_submit<A: HalApi>( + &self, + queue_id: id::QueueId, + command_buffer_ids: &[id::CommandBufferId], + ) -> Result<WrappedSubmissionIndex, QueueSubmitError> { + profiling::scope!("Queue::submit"); + + let (submit_index, callbacks) = { + let hub = A::hub(self); + let mut token = Token::root(); + + let (mut device_guard, mut token) = hub.devices.write(&mut token); + let device = device_guard + .get_mut(queue_id) + .map_err(|_| DeviceError::Invalid)?; + device.temp_suspected.clear(); + device.active_submission_index += 1; + let submit_index = device.active_submission_index; + let mut active_executions = Vec::new(); + let mut used_surface_textures = track::TextureUsageScope::new(); + + { + let (mut command_buffer_guard, mut token) = hub.command_buffers.write(&mut token); + + if !command_buffer_ids.is_empty() { + profiling::scope!("prepare"); + + let (render_bundle_guard, mut token) = hub.render_bundles.read(&mut token); + let (_, mut token) = hub.pipeline_layouts.read(&mut token); + let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); + let (compute_pipe_guard, mut token) = hub.compute_pipelines.read(&mut token); + let (render_pipe_guard, mut token) = hub.render_pipelines.read(&mut token); + let (mut buffer_guard, mut token) = hub.buffers.write(&mut token); + let (mut texture_guard, mut token) = hub.textures.write(&mut token); + let (texture_view_guard, mut token) = hub.texture_views.read(&mut token); + let (sampler_guard, mut token) = hub.samplers.read(&mut token); + let (query_set_guard, _) = hub.query_sets.read(&mut token); + + //Note: locking the trackers has to be done after the storages + let mut trackers = device.trackers.lock(); + + //TODO: if multiple command buffers are submitted, we can re-use the last + // native command buffer of the previous chain instead of always creating + // a temporary one, since the chains are not finished. + + // finish all the command buffers first + for &cmb_id in command_buffer_ids { + // we reset the used surface textures every time we use + // it, so make sure to set_size on it. + used_surface_textures.set_size(texture_guard.len()); + + #[allow(unused_mut)] + let mut cmdbuf = match hub + .command_buffers + .unregister_locked(cmb_id, &mut *command_buffer_guard) + { + Some(cmdbuf) => cmdbuf, + None => continue, + }; + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace.lock().add(Action::Submit( + submit_index, + cmdbuf.commands.take().unwrap(), + )); + } + if !cmdbuf.is_finished() { + device.destroy_command_buffer(cmdbuf); + continue; + } + + // optimize the tracked states + // cmdbuf.trackers.optimize(); + + // update submission IDs + for id in cmdbuf.trackers.buffers.used() { + let buffer = &mut buffer_guard[id]; + let raw_buf = match buffer.raw { + Some(ref raw) => raw, + None => { + return Err(QueueSubmitError::DestroyedBuffer(id.0)); + } + }; + if !buffer.life_guard.use_at(submit_index) { + if let BufferMapState::Active { .. } = buffer.map_state { + log::warn!("Dropped buffer has a pending mapping."); + unsafe { device.raw.unmap_buffer(raw_buf) } + .map_err(DeviceError::from)?; + } + device.temp_suspected.buffers.push(id); + } else { + match buffer.map_state { + BufferMapState::Idle => (), + _ => panic!("Buffer {:?} is still mapped", id), + } + } + } + for id in cmdbuf.trackers.textures.used() { + let texture = &mut texture_guard[id]; + let should_extend = match texture.inner { + TextureInner::Native { raw: None } => { + return Err(QueueSubmitError::DestroyedTexture(id.0)); + } + TextureInner::Native { raw: Some(_) } => false, + TextureInner::Surface { + ref mut has_work, .. + } => { + *has_work = true; + true + } + }; + if !texture.life_guard.use_at(submit_index) { + device.temp_suspected.textures.push(id); + } + if should_extend { + unsafe { + let ref_count = cmdbuf.trackers.textures.get_ref_count(id); + used_surface_textures + .merge_single( + &*texture_guard, + id, + None, + ref_count, + hal::TextureUses::PRESENT, + ) + .unwrap(); + }; + } + } + for id in cmdbuf.trackers.views.used() { + if !texture_view_guard[id].life_guard.use_at(submit_index) { + device.temp_suspected.texture_views.push(id); + } + } + for id in cmdbuf.trackers.bind_groups.used() { + let bg = &bind_group_guard[id]; + if !bg.life_guard.use_at(submit_index) { + device.temp_suspected.bind_groups.push(id); + } + // We need to update the submission indices for the contained + // state-less (!) resources as well, so that they don't get + // deleted too early if the parent bind group goes out of scope. + for sub_id in bg.used.views.used() { + texture_view_guard[sub_id].life_guard.use_at(submit_index); + } + for sub_id in bg.used.samplers.used() { + sampler_guard[sub_id].life_guard.use_at(submit_index); + } + } + // assert!(cmdbuf.trackers.samplers.is_empty()); + for id in cmdbuf.trackers.compute_pipelines.used() { + if !compute_pipe_guard[id].life_guard.use_at(submit_index) { + device.temp_suspected.compute_pipelines.push(id); + } + } + for id in cmdbuf.trackers.render_pipelines.used() { + if !render_pipe_guard[id].life_guard.use_at(submit_index) { + device.temp_suspected.render_pipelines.push(id); + } + } + for id in cmdbuf.trackers.query_sets.used() { + if !query_set_guard[id].life_guard.use_at(submit_index) { + device.temp_suspected.query_sets.push(id); + } + } + for id in cmdbuf.trackers.bundles.used() { + let bundle = &render_bundle_guard[id]; + if !bundle.life_guard.use_at(submit_index) { + device.temp_suspected.render_bundles.push(id); + } + // We need to update the submission indices for the contained + // state-less (!) resources as well, excluding the bind groups. + // They don't get deleted too early if the bundle goes out of scope. + for sub_id in bundle.used.render_pipelines.used() { + render_pipe_guard[sub_id].life_guard.use_at(submit_index); + } + for sub_id in bundle.used.query_sets.used() { + query_set_guard[sub_id].life_guard.use_at(submit_index); + } + } + + let mut baked = cmdbuf.into_baked(); + // execute resource transitions + unsafe { + baked + .encoder + .begin_encoding(Some("(wgpu internal) Transit")) + .map_err(DeviceError::from)? + }; + log::trace!("Stitching command buffer {:?} before submission", cmb_id); + baked + .initialize_buffer_memory(&mut *trackers, &mut *buffer_guard) + .map_err(|err| QueueSubmitError::DestroyedBuffer(err.0))?; + baked + .initialize_texture_memory(&mut *trackers, &mut *texture_guard, device) + .map_err(|err| QueueSubmitError::DestroyedTexture(err.0))?; + //Note: stateless trackers are not merged: + // device already knows these resources exist. + CommandBuffer::insert_barriers_from_tracker( + &mut baked.encoder, + &mut *trackers, + &baked.trackers, + &*buffer_guard, + &*texture_guard, + ); + + let transit = unsafe { baked.encoder.end_encoding().unwrap() }; + baked.list.insert(0, transit); + + // Transition surface textures into `Present` state. + // Note: we could technically do it after all of the command buffers, + // but here we have a command encoder by hand, so it's easier to use it. + if !used_surface_textures.is_empty() { + unsafe { + baked + .encoder + .begin_encoding(Some("(wgpu internal) Present")) + .map_err(DeviceError::from)? + }; + trackers + .textures + .set_from_usage_scope(&*texture_guard, &used_surface_textures); + let texture_barriers = trackers.textures.drain().map(|pending| { + let tex = unsafe { texture_guard.get_unchecked(pending.id) }; + pending.into_hal(tex) + }); + let present = unsafe { + baked.encoder.transition_textures(texture_barriers); + baked.encoder.end_encoding().unwrap() + }; + baked.list.push(present); + used_surface_textures = track::TextureUsageScope::new(); + } + + // done + active_executions.push(EncoderInFlight { + raw: baked.encoder, + cmd_buffers: baked.list, + }); + } + + log::trace!("Device after submission {}", submit_index); + } + + let super::Device { + ref mut pending_writes, + ref mut queue, + ref mut fence, + .. + } = *device; + + { + // TODO: These blocks have a few organizational issues, and + // should be refactored. + // + // 1) It's similar to the code we have per-command-buffer + // (at the begin and end) Maybe we can merge some? + // + // 2) It's doing the extra locking unconditionally. Maybe we + // can only do so if any surfaces are being written to? + let (_, mut token) = hub.buffers.read(&mut token); // skip token + let (mut texture_guard, _) = hub.textures.write(&mut token); + + used_surface_textures.set_size(texture_guard.len()); + + for &id in pending_writes.dst_textures.iter() { + let texture = texture_guard.get_mut(id).unwrap(); + match texture.inner { + TextureInner::Native { raw: None } => { + return Err(QueueSubmitError::DestroyedTexture(id)); + } + TextureInner::Native { raw: Some(_) } => {} + TextureInner::Surface { + ref mut has_work, .. + } => { + *has_work = true; + let ref_count = texture.life_guard.add_ref(); + unsafe { + used_surface_textures + .merge_single( + &*texture_guard, + id::Valid(id), + None, + &ref_count, + hal::TextureUses::PRESENT, + ) + .unwrap() + }; + } + } + } + + if !used_surface_textures.is_empty() { + let mut trackers = device.trackers.lock(); + + trackers + .textures + .set_from_usage_scope(&*texture_guard, &used_surface_textures); + let texture_barriers = trackers.textures.drain().map(|pending| { + let tex = unsafe { texture_guard.get_unchecked(pending.id) }; + pending.into_hal(tex) + }); + + unsafe { + pending_writes + .command_encoder + .transition_textures(texture_barriers); + }; + } + } + + let refs = pending_writes + .pre_submit() + .into_iter() + .chain( + active_executions + .iter() + .flat_map(|pool_execution| pool_execution.cmd_buffers.iter()), + ) + .collect::<Vec<_>>(); + unsafe { + queue + .submit(&refs, Some((fence, submit_index))) + .map_err(DeviceError::from)?; + } + } + + profiling::scope!("cleanup"); + if let Some(pending_execution) = device.pending_writes.post_submit( + &device.command_allocator, + &device.raw, + &device.queue, + ) { + active_executions.push(pending_execution); + } + + // this will register the new submission to the life time tracker + let mut pending_write_resources = mem::take(&mut device.pending_writes.temp_resources); + device.lock_life(&mut token).track_submission( + submit_index, + pending_write_resources.drain(..), + active_executions, + ); + + // This will schedule destruction of all resources that are no longer needed + // by the user but used in the command stream, among other things. + let (closures, _) = match device.maintain(hub, wgt::Maintain::Poll, &mut token) { + Ok(closures) => closures, + Err(WaitIdleError::Device(err)) => return Err(QueueSubmitError::Queue(err)), + Err(WaitIdleError::StuckGpu) => return Err(QueueSubmitError::StuckGpu), + Err(WaitIdleError::WrongSubmissionIndex(..)) => unreachable!(), + }; + + // pending_write_resources has been drained, so it's empty, but we + // want to retain its heap allocation. + device.pending_writes.temp_resources = pending_write_resources; + device.temp_suspected.clear(); + device.lock_life(&mut token).post_submit(); + + (submit_index, closures) + }; + + // the closures should execute with nothing locked! + callbacks.fire(); + + Ok(WrappedSubmissionIndex { + queue_id, + index: submit_index, + }) + } + + pub fn queue_get_timestamp_period<A: HalApi>( + &self, + queue_id: id::QueueId, + ) -> Result<f32, InvalidQueue> { + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, _) = hub.devices.read(&mut token); + match device_guard.get(queue_id) { + Ok(device) => Ok(unsafe { device.queue.get_timestamp_period() }), + Err(_) => Err(InvalidQueue), + } + } + + pub fn queue_on_submitted_work_done<A: HalApi>( + &self, + queue_id: id::QueueId, + closure: SubmittedWorkDoneClosure, + ) -> Result<(), InvalidQueue> { + //TODO: flush pending writes + let closure_opt = { + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); + match device_guard.get(queue_id) { + Ok(device) => device.lock_life(&mut token).add_work_done_closure(closure), + Err(_) => return Err(InvalidQueue), + } + }; + if let Some(closure) = closure_opt { + closure.call(); + } + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/device/trace.rs b/third_party/rust/wgpu-core/src/device/trace.rs new file mode 100644 index 0000000000..8892a8e016 --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/trace.rs @@ -0,0 +1,231 @@ +use crate::id; +use std::ops::Range; +#[cfg(feature = "trace")] +use std::{borrow::Cow, io::Write as _}; + +//TODO: consider a readable Id that doesn't include the backend + +type FileName = String; + +pub const FILE_NAME: &str = "trace.ron"; + +#[cfg(feature = "trace")] +pub(crate) fn new_render_bundle_encoder_descriptor<'a>( + label: crate::Label<'a>, + context: &'a super::RenderPassContext, + depth_read_only: bool, + stencil_read_only: bool, +) -> crate::command::RenderBundleEncoderDescriptor<'a> { + crate::command::RenderBundleEncoderDescriptor { + label, + color_formats: Cow::Borrowed(&context.attachments.colors), + depth_stencil: context.attachments.depth_stencil.map(|format| { + wgt::RenderBundleDepthStencil { + format, + depth_read_only, + stencil_read_only, + } + }), + sample_count: context.sample_count, + multiview: context.multiview, + } +} + +#[allow(clippy::large_enum_variant)] +#[derive(Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub enum Action<'a> { + Init { + desc: crate::device::DeviceDescriptor<'a>, + backend: wgt::Backend, + }, + ConfigureSurface(id::SurfaceId, wgt::SurfaceConfiguration), + CreateBuffer(id::BufferId, crate::resource::BufferDescriptor<'a>), + FreeBuffer(id::BufferId), + DestroyBuffer(id::BufferId), + CreateTexture(id::TextureId, crate::resource::TextureDescriptor<'a>), + FreeTexture(id::TextureId), + DestroyTexture(id::TextureId), + CreateTextureView { + id: id::TextureViewId, + parent_id: id::TextureId, + desc: crate::resource::TextureViewDescriptor<'a>, + }, + DestroyTextureView(id::TextureViewId), + CreateSampler(id::SamplerId, crate::resource::SamplerDescriptor<'a>), + DestroySampler(id::SamplerId), + GetSurfaceTexture { + id: id::TextureId, + parent_id: id::SurfaceId, + }, + Present(id::SurfaceId), + DiscardSurfaceTexture(id::SurfaceId), + CreateBindGroupLayout( + id::BindGroupLayoutId, + crate::binding_model::BindGroupLayoutDescriptor<'a>, + ), + DestroyBindGroupLayout(id::BindGroupLayoutId), + CreatePipelineLayout( + id::PipelineLayoutId, + crate::binding_model::PipelineLayoutDescriptor<'a>, + ), + DestroyPipelineLayout(id::PipelineLayoutId), + CreateBindGroup( + id::BindGroupId, + crate::binding_model::BindGroupDescriptor<'a>, + ), + DestroyBindGroup(id::BindGroupId), + CreateShaderModule { + id: id::ShaderModuleId, + desc: crate::pipeline::ShaderModuleDescriptor<'a>, + data: FileName, + }, + DestroyShaderModule(id::ShaderModuleId), + CreateComputePipeline { + id: id::ComputePipelineId, + desc: crate::pipeline::ComputePipelineDescriptor<'a>, + #[cfg_attr(feature = "replay", serde(default))] + implicit_context: Option<super::ImplicitPipelineContext>, + }, + DestroyComputePipeline(id::ComputePipelineId), + CreateRenderPipeline { + id: id::RenderPipelineId, + desc: crate::pipeline::RenderPipelineDescriptor<'a>, + #[cfg_attr(feature = "replay", serde(default))] + implicit_context: Option<super::ImplicitPipelineContext>, + }, + DestroyRenderPipeline(id::RenderPipelineId), + CreateRenderBundle { + id: id::RenderBundleId, + desc: crate::command::RenderBundleEncoderDescriptor<'a>, + base: crate::command::BasePass<crate::command::RenderCommand>, + }, + DestroyRenderBundle(id::RenderBundleId), + CreateQuerySet { + id: id::QuerySetId, + desc: crate::resource::QuerySetDescriptor<'a>, + }, + DestroyQuerySet(id::QuerySetId), + WriteBuffer { + id: id::BufferId, + data: FileName, + range: Range<wgt::BufferAddress>, + queued: bool, + }, + WriteTexture { + to: crate::command::ImageCopyTexture, + data: FileName, + layout: wgt::ImageDataLayout, + size: wgt::Extent3d, + }, + Submit(crate::SubmissionIndex, Vec<Command>), +} + +#[derive(Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub enum Command { + CopyBufferToBuffer { + src: id::BufferId, + src_offset: wgt::BufferAddress, + dst: id::BufferId, + dst_offset: wgt::BufferAddress, + size: wgt::BufferAddress, + }, + CopyBufferToTexture { + src: crate::command::ImageCopyBuffer, + dst: crate::command::ImageCopyTexture, + size: wgt::Extent3d, + }, + CopyTextureToBuffer { + src: crate::command::ImageCopyTexture, + dst: crate::command::ImageCopyBuffer, + size: wgt::Extent3d, + }, + CopyTextureToTexture { + src: crate::command::ImageCopyTexture, + dst: crate::command::ImageCopyTexture, + size: wgt::Extent3d, + }, + ClearBuffer { + dst: id::BufferId, + offset: wgt::BufferAddress, + size: Option<wgt::BufferSize>, + }, + ClearTexture { + dst: id::TextureId, + subresource_range: wgt::ImageSubresourceRange, + }, + WriteTimestamp { + query_set_id: id::QuerySetId, + query_index: u32, + }, + ResolveQuerySet { + query_set_id: id::QuerySetId, + start_query: u32, + query_count: u32, + destination: id::BufferId, + destination_offset: wgt::BufferAddress, + }, + PushDebugGroup(String), + PopDebugGroup, + InsertDebugMarker(String), + RunComputePass { + base: crate::command::BasePass<crate::command::ComputeCommand>, + }, + RunRenderPass { + base: crate::command::BasePass<crate::command::RenderCommand>, + target_colors: Vec<Option<crate::command::RenderPassColorAttachment>>, + target_depth_stencil: Option<crate::command::RenderPassDepthStencilAttachment>, + }, +} + +#[cfg(feature = "trace")] +#[derive(Debug)] +pub struct Trace { + path: std::path::PathBuf, + file: std::fs::File, + config: ron::ser::PrettyConfig, + binary_id: usize, +} + +#[cfg(feature = "trace")] +impl Trace { + pub fn new(path: &std::path::Path) -> Result<Self, std::io::Error> { + log::info!("Tracing into '{:?}'", path); + let mut file = std::fs::File::create(path.join(FILE_NAME))?; + file.write_all(b"[\n")?; + Ok(Self { + path: path.to_path_buf(), + file, + config: ron::ser::PrettyConfig::default(), + binary_id: 0, + }) + } + + pub fn make_binary(&mut self, kind: &str, data: &[u8]) -> String { + self.binary_id += 1; + let name = format!("data{}.{}", self.binary_id, kind); + let _ = std::fs::write(self.path.join(&name), data); + name + } + + pub(crate) fn add(&mut self, action: Action) { + match ron::ser::to_string_pretty(&action, self.config.clone()) { + Ok(string) => { + let _ = writeln!(self.file, "{},", string); + } + Err(e) => { + log::warn!("RON serialization failure: {:?}", e); + } + } + } +} + +#[cfg(feature = "trace")] +impl Drop for Trace { + fn drop(&mut self) { + let _ = self.file.write_all(b"]"); + } +} diff --git a/third_party/rust/wgpu-core/src/error.rs b/third_party/rust/wgpu-core/src/error.rs new file mode 100644 index 0000000000..23a80064f7 --- /dev/null +++ b/third_party/rust/wgpu-core/src/error.rs @@ -0,0 +1,190 @@ +use core::fmt; +use std::error::Error; + +use crate::{ + gfx_select, + hub::{Global, IdentityManagerFactory}, +}; + +pub struct ErrorFormatter<'a> { + writer: &'a mut dyn fmt::Write, + global: &'a Global<IdentityManagerFactory>, +} + +impl<'a> ErrorFormatter<'a> { + pub fn error(&mut self, err: &dyn Error) { + writeln!(self.writer, " {}", err).expect("Error formatting error"); + } + + pub fn note(&mut self, note: &dyn fmt::Display) { + writeln!(self.writer, " note: {}", note).expect("Error formatting error"); + } + + pub fn label(&mut self, label_key: &str, label_value: &str) { + if !label_key.is_empty() && !label_value.is_empty() { + self.note(&format!("{} = `{}`", label_key, label_value)); + } + } + + pub fn bind_group_label(&mut self, id: &crate::id::BindGroupId) { + let global = self.global; + let label = gfx_select!(id => global.bind_group_label(*id)); + self.label("bind group", &label); + } + + pub fn bind_group_layout_label(&mut self, id: &crate::id::BindGroupLayoutId) { + let global = self.global; + let label = gfx_select!(id => global.bind_group_layout_label(*id)); + self.label("bind group layout", &label); + } + + pub fn render_pipeline_label(&mut self, id: &crate::id::RenderPipelineId) { + let global = self.global; + let label = gfx_select!(id => global.render_pipeline_label(*id)); + self.label("render pipeline", &label); + } + + pub fn compute_pipeline_label(&mut self, id: &crate::id::ComputePipelineId) { + let global = self.global; + let label = gfx_select!(id => global.compute_pipeline_label(*id)); + self.label("compute pipeline", &label); + } + + pub fn buffer_label_with_key(&mut self, id: &crate::id::BufferId, key: &str) { + let global = self.global; + let label = gfx_select!(id => global.buffer_label(*id)); + self.label(key, &label); + } + + pub fn buffer_label(&mut self, id: &crate::id::BufferId) { + self.buffer_label_with_key(id, "buffer"); + } + + pub fn texture_label_with_key(&mut self, id: &crate::id::TextureId, key: &str) { + let global = self.global; + let label = gfx_select!(id => global.texture_label(*id)); + self.label(key, &label); + } + + pub fn texture_label(&mut self, id: &crate::id::TextureId) { + self.texture_label_with_key(id, "texture"); + } + + pub fn texture_view_label_with_key(&mut self, id: &crate::id::TextureViewId, key: &str) { + let global = self.global; + let label = gfx_select!(id => global.texture_view_label(*id)); + self.label(key, &label); + } + + pub fn texture_view_label(&mut self, id: &crate::id::TextureViewId) { + self.texture_view_label_with_key(id, "texture view"); + } + + pub fn sampler_label(&mut self, id: &crate::id::SamplerId) { + let global = self.global; + let label = gfx_select!(id => global.sampler_label(*id)); + self.label("sampler", &label); + } + + pub fn command_buffer_label(&mut self, id: &crate::id::CommandBufferId) { + let global = self.global; + let label = gfx_select!(id => global.command_buffer_label(*id)); + self.label("command buffer", &label); + } + + pub fn query_set_label(&mut self, id: &crate::id::QuerySetId) { + let global = self.global; + let label = gfx_select!(id => global.query_set_label(*id)); + self.label("query set", &label); + } +} + +pub trait PrettyError: Error + Sized { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + } +} + +pub fn format_pretty_any( + writer: &mut dyn fmt::Write, + global: &Global<IdentityManagerFactory>, + error: &(dyn Error + 'static), +) { + let mut fmt = ErrorFormatter { writer, global }; + + if let Some(pretty_err) = error.downcast_ref::<ContextError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + + if let Some(pretty_err) = error.downcast_ref::<crate::command::RenderCommandError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::binding_model::CreateBindGroupError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = + error.downcast_ref::<crate::binding_model::CreatePipelineLayoutError>() + { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::ExecutionError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::RenderPassErrorInner>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::RenderPassError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::ComputePassErrorInner>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::ComputePassError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::RenderBundleError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::TransferError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::PassErrorScope>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::track::UsageConflict>() { + return pretty_err.fmt_pretty(&mut fmt); + } + if let Some(pretty_err) = error.downcast_ref::<crate::command::QueryError>() { + return pretty_err.fmt_pretty(&mut fmt); + } + + // default + fmt.error(error) +} + +#[derive(Debug)] +pub struct ContextError { + pub string: &'static str, + pub cause: Box<dyn Error + Send + Sync + 'static>, + pub label_key: &'static str, + pub label: String, +} + +impl PrettyError for ContextError { + fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { + fmt.error(self); + fmt.label(self.label_key, &self.label); + } +} + +impl fmt::Display for ContextError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "In {}", self.string) + } +} + +impl Error for ContextError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + Some(self.cause.as_ref()) + } +} diff --git a/third_party/rust/wgpu-core/src/hub.rs b/third_party/rust/wgpu-core/src/hub.rs new file mode 100644 index 0000000000..290df3c716 --- /dev/null +++ b/third_party/rust/wgpu-core/src/hub.rs @@ -0,0 +1,1403 @@ +/*! Allocating resource ids, and tracking the resources they refer to. + +The `wgpu_core` API uses identifiers of type [`Id<R>`] to refer to +resources of type `R`. For example, [`id::DeviceId`] is an alias for +`Id<Device<Empty>>`, and [`id::BufferId`] is an alias for +`Id<Buffer<Empty>>`. `Id` implements `Copy`, `Hash`, `Eq`, `Ord`, and +of course `Debug`. + +Each `Id` contains not only an index for the resource it denotes but +also a [`Backend`] indicating which `wgpu` backend it belongs to. You +can use the [`gfx_select`] macro to dynamically dispatch on an id's +backend to a function specialized at compile time for a specific +backend. See that macro's documentation for details. + +`Id`s also incorporate a generation number, for additional validation. + +The resources to which identifiers refer are freed explicitly. +Attempting to use an identifier for a resource that has been freed +elicits an error result. + +## Assigning ids to resources + +The users of `wgpu_core` generally want resource ids to be assigned +in one of two ways: + +- Users like `wgpu` want `wgpu_core` to assign ids to resources itself. + For example, `wgpu` expects to call `Global::device_create_buffer` + and have the return value indicate the newly created buffer's id. + +- Users like `player` and Firefox want to allocate ids themselves, and + pass `Global::device_create_buffer` and friends the id to assign the + new resource. + +To accommodate either pattern, `wgpu_core` methods that create +resources all expect an `id_in` argument that the caller can use to +specify the id, and they all return the id used. For example, the +declaration of `Global::device_create_buffer` looks like this: + +```ignore +impl<G: GlobalIdentityHandlerFactory> Global<G> { + /* ... */ + pub fn device_create_buffer<A: HalApi>( + &self, + device_id: id::DeviceId, + desc: &resource::BufferDescriptor, + id_in: Input<G, id::BufferId>, + ) -> (id::BufferId, Option<resource::CreateBufferError>) { + /* ... */ + } + /* ... */ +} +``` + +Users that want to assign resource ids themselves pass in the id they +want as the `id_in` argument, whereas users that want `wgpu_core` +itself to choose ids always pass `()`. In either case, the id +ultimately assigned is returned as the first element of the tuple. + +Producing true identifiers from `id_in` values is the job of an +[`IdentityHandler`] implementation, which has an associated type +[`Input`] saying what type of `id_in` values it accepts, and a +[`process`] method that turns such values into true identifiers of +type `I`. There are two kinds of `IdentityHandler`s: + +- Users that want `wgpu_core` to assign ids generally use + [`IdentityManager`] ([wrapped in a mutex]). Its `Input` type is + `()`, and it tracks assigned ids and generation numbers as + necessary. (This is what `wgpu` does.) + +- Users that want to assign ids themselves use an `IdentityHandler` + whose `Input` type is `I` itself, and whose `process` method simply + passes the `id_in` argument through unchanged. For example, the + `player` crate uses an `IdentityPassThrough` type whose `process` + method simply adjusts the id's backend (since recordings can be + replayed on a different backend than the one they were created on) + but passes the rest of the id's content through unchanged. + +Because an `IdentityHandler<I>` can only create ids for a single +resource type `I`, constructing a [`Global`] entails constructing a +separate `IdentityHandler<I>` for each resource type `I` that the +`Global` will manage: an `IdentityHandler<DeviceId>`, an +`IdentityHandler<TextureId>`, and so on. + +The [`Global::new`] function could simply take a large collection of +`IdentityHandler<I>` implementations as arguments, but that would be +ungainly. Instead, `Global::new` expects a `factory` argument that +implements the [`GlobalIdentityHandlerFactory`] trait, which extends +[`IdentityHandlerFactory<I>`] for each resource id type `I`. This +trait, in turn, has a `spawn` method that constructs an +`IdentityHandler<I>` for the `Global` to use. + +What this means is that the types of resource creation functions' +`id_in` arguments depend on the `Global`'s `G` type parameter. A +`Global<G>`'s `IdentityHandler<I>` implementation is: + +```ignore +<G as IdentityHandlerFactory<I>>::Filter +``` + +where `Filter` is an associated type of the `IdentityHandlerFactory` trait. +Thus, its `id_in` type is: + +```ignore +<<G as IdentityHandlerFactory<I>>::Filter as IdentityHandler<I>>::Input +``` + +The [`Input<G, I>`] type is an alias for this construction. + +## Id allocation and streaming + +Perhaps surprisingly, allowing users to assign resource ids themselves +enables major performance improvements in some applications. + +The `wgpu_core` API is designed for use by Firefox's [WebGPU] +implementation. For security, web content and GPU use must be kept +segregated in separate processes, with all interaction between them +mediated by an inter-process communication protocol. As web content uses +the WebGPU API, the content process sends messages to the GPU process, +which interacts with the platform's GPU APIs on content's behalf, +occasionally sending results back. + +In a classic Rust API, a resource allocation function takes parameters +describing the resource to create, and if creation succeeds, it returns +the resource id in a `Result::Ok` value. However, this design is a poor +fit for the split-process design described above: content must wait for +the reply to its buffer-creation message (say) before it can know which +id it can use in the next message that uses that buffer. On a common +usage pattern, the classic Rust design imposes the latency of a full +cross-process round trip. + +We can avoid incurring these round-trip latencies simply by letting the +content process assign resource ids itself. With this approach, content +can choose an id for the new buffer, send a message to create the +buffer, and then immediately send the next message operating on that +buffer, since it already knows its id. Allowing content and GPU process +activity to be pipelined greatly improves throughput. + +To help propagate errors correctly in this style of usage, when resource +creation fails, the id supplied for that resource is marked to indicate +as much, allowing subsequent operations using that id to be properly +flagged as errors as well. + +[`gfx_select`]: crate::gfx_select +[`Input`]: IdentityHandler::Input +[`process`]: IdentityHandler::process +[`Id<R>`]: crate::id::Id +[wrapped in a mutex]: trait.IdentityHandler.html#impl-IdentityHandler%3CI%3E-for-Mutex%3CIdentityManager%3E +[WebGPU]: https://www.w3.org/TR/webgpu/ + +*/ + +use crate::{ + binding_model::{BindGroup, BindGroupLayout, PipelineLayout}, + command::{CommandBuffer, RenderBundle}, + device::Device, + id, + instance::{Adapter, HalSurface, Instance, Surface}, + pipeline::{ComputePipeline, RenderPipeline, ShaderModule}, + resource::{Buffer, QuerySet, Sampler, StagingBuffer, Texture, TextureClearMode, TextureView}, + Epoch, Index, +}; + +use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use wgt::Backend; + +#[cfg(debug_assertions)] +use std::cell::Cell; +use std::{fmt::Debug, marker::PhantomData, mem, ops}; + +/// A simple structure to allocate [`Id`] identifiers. +/// +/// Calling [`alloc`] returns a fresh, never-before-seen id. Calling [`free`] +/// marks an id as dead; it will never be returned again by `alloc`. +/// +/// Use `IdentityManager::default` to construct new instances. +/// +/// `IdentityManager` returns `Id`s whose index values are suitable for use as +/// indices into a `Storage<T>` that holds those ids' referents: +/// +/// - Every live id has a distinct index value. Each live id's index selects a +/// distinct element in the vector. +/// +/// - `IdentityManager` prefers low index numbers. If you size your vector to +/// accommodate the indices produced here, the vector's length will reflect +/// the highwater mark of actual occupancy. +/// +/// - `IdentityManager` reuses the index values of freed ids before returning +/// ids with new index values. Freed vector entries get reused. +/// +/// See the module-level documentation for an overview of how this +/// fits together. +/// +/// [`Id`]: crate::id::Id +/// [`Backend`]: wgt::Backend; +/// [`alloc`]: IdentityManager::alloc +/// [`free`]: IdentityManager::free +#[derive(Debug, Default)] +pub struct IdentityManager { + /// Available index values. If empty, then `epochs.len()` is the next index + /// to allocate. + free: Vec<Index>, + + /// The next or currently-live epoch value associated with each `Id` index. + /// + /// If there is a live id with index `i`, then `epochs[i]` is its epoch; any + /// id with the same index but an older epoch is dead. + /// + /// If index `i` is currently unused, `epochs[i]` is the epoch to use in its + /// next `Id`. + epochs: Vec<Epoch>, +} + +impl IdentityManager { + /// Allocate a fresh, never-before-seen id with the given `backend`. + /// + /// The backend is incorporated into the id, so that ids allocated with + /// different `backend` values are always distinct. + pub fn alloc<I: id::TypedId>(&mut self, backend: Backend) -> I { + match self.free.pop() { + Some(index) => I::zip(index, self.epochs[index as usize], backend), + None => { + let epoch = 1; + let id = I::zip(self.epochs.len() as Index, epoch, backend); + self.epochs.push(epoch); + id + } + } + } + + /// Free `id`. It will never be returned from `alloc` again. + pub fn free<I: id::TypedId + Debug>(&mut self, id: I) { + let (index, epoch, _backend) = id.unzip(); + let pe = &mut self.epochs[index as usize]; + assert_eq!(*pe, epoch); + // If the epoch reaches EOL, the index doesn't go + // into the free list, will never be reused again. + if epoch < id::EPOCH_MASK { + *pe = epoch + 1; + self.free.push(index); + } + } +} + +/// An entry in a `Storage::map` table. +#[derive(Debug)] +enum Element<T> { + /// There are no live ids with this index. + Vacant, + + /// There is one live id with this index, allocated at the given + /// epoch. + Occupied(T, Epoch), + + /// Like `Occupied`, but an error occurred when creating the + /// resource. + /// + /// The given `String` is the resource's descriptor label. + Error(Epoch, String), +} + +#[derive(Clone, Debug, Default)] +pub struct StorageReport { + pub num_occupied: usize, + pub num_vacant: usize, + pub num_error: usize, + pub element_size: usize, +} + +impl StorageReport { + pub fn is_empty(&self) -> bool { + self.num_occupied + self.num_vacant + self.num_error == 0 + } +} + +#[derive(Clone, Debug)] +pub(crate) struct InvalidId; + +/// A table of `T` values indexed by the id type `I`. +/// +/// The table is represented as a vector indexed by the ids' index +/// values, so you should use an id allocator like `IdentityManager` +/// that keeps the index values dense and close to zero. +#[derive(Debug)] +pub struct Storage<T, I: id::TypedId> { + map: Vec<Element<T>>, + kind: &'static str, + _phantom: PhantomData<I>, +} + +impl<T, I: id::TypedId> ops::Index<id::Valid<I>> for Storage<T, I> { + type Output = T; + fn index(&self, id: id::Valid<I>) -> &T { + self.get(id.0).unwrap() + } +} + +impl<T, I: id::TypedId> ops::IndexMut<id::Valid<I>> for Storage<T, I> { + fn index_mut(&mut self, id: id::Valid<I>) -> &mut T { + self.get_mut(id.0).unwrap() + } +} + +impl<T, I: id::TypedId> Storage<T, I> { + pub(crate) fn contains(&self, id: I) -> bool { + let (index, epoch, _) = id.unzip(); + match self.map.get(index as usize) { + Some(&Element::Vacant) => false, + Some(&Element::Occupied(_, storage_epoch) | &Element::Error(storage_epoch, _)) => { + storage_epoch == epoch + } + None => false, + } + } + + /// Attempts to get a reference to an item behind a potentially invalid ID. + /// + /// Returns [`None`] if there is an epoch mismatch, or the entry is empty. + /// + /// This function is primarily intended for the `as_hal` family of functions + /// where you may need to fallibly get a object backed by an id that could + /// be in a different hub. + pub(crate) fn try_get(&self, id: I) -> Result<Option<&T>, InvalidId> { + let (index, epoch, _) = id.unzip(); + let (result, storage_epoch) = match self.map.get(index as usize) { + Some(&Element::Occupied(ref v, epoch)) => (Ok(Some(v)), epoch), + Some(&Element::Vacant) => return Ok(None), + Some(&Element::Error(epoch, ..)) => (Err(InvalidId), epoch), + None => return Err(InvalidId), + }; + assert_eq!( + epoch, storage_epoch, + "{}[{}] is no longer alive", + self.kind, index + ); + result + } + + /// Get a reference to an item behind a potentially invalid ID. + /// Panics if there is an epoch mismatch, or the entry is empty. + pub(crate) fn get(&self, id: I) -> Result<&T, InvalidId> { + let (index, epoch, _) = id.unzip(); + let (result, storage_epoch) = match self.map.get(index as usize) { + Some(&Element::Occupied(ref v, epoch)) => (Ok(v), epoch), + Some(&Element::Vacant) => panic!("{}[{}] does not exist", self.kind, index), + Some(&Element::Error(epoch, ..)) => (Err(InvalidId), epoch), + None => return Err(InvalidId), + }; + assert_eq!( + epoch, storage_epoch, + "{}[{}] is no longer alive", + self.kind, index + ); + result + } + + /// Get a mutable reference to an item behind a potentially invalid ID. + /// Panics if there is an epoch mismatch, or the entry is empty. + pub(crate) fn get_mut(&mut self, id: I) -> Result<&mut T, InvalidId> { + let (index, epoch, _) = id.unzip(); + let (result, storage_epoch) = match self.map.get_mut(index as usize) { + Some(&mut Element::Occupied(ref mut v, epoch)) => (Ok(v), epoch), + Some(&mut Element::Vacant) | None => panic!("{}[{}] does not exist", self.kind, index), + Some(&mut Element::Error(epoch, ..)) => (Err(InvalidId), epoch), + }; + assert_eq!( + epoch, storage_epoch, + "{}[{}] is no longer alive", + self.kind, index + ); + result + } + + pub(crate) unsafe fn get_unchecked(&self, id: u32) -> &T { + match self.map[id as usize] { + Element::Occupied(ref v, _) => v, + Element::Vacant => panic!("{}[{}] does not exist", self.kind, id), + Element::Error(_, _) => panic!(""), + } + } + + pub(crate) fn label_for_invalid_id(&self, id: I) -> &str { + let (index, _, _) = id.unzip(); + match self.map.get(index as usize) { + Some(&Element::Error(_, ref label)) => label, + _ => "", + } + } + + fn insert_impl(&mut self, index: usize, element: Element<T>) { + if index >= self.map.len() { + self.map.resize_with(index + 1, || Element::Vacant); + } + match std::mem::replace(&mut self.map[index], element) { + Element::Vacant => {} + _ => panic!("Index {:?} is already occupied", index), + } + } + + pub(crate) fn insert(&mut self, id: I, value: T) { + let (index, epoch, _) = id.unzip(); + self.insert_impl(index as usize, Element::Occupied(value, epoch)) + } + + pub(crate) fn insert_error(&mut self, id: I, label: &str) { + let (index, epoch, _) = id.unzip(); + self.insert_impl(index as usize, Element::Error(epoch, label.to_string())) + } + + pub(crate) fn force_replace(&mut self, id: I, value: T) { + let (index, epoch, _) = id.unzip(); + self.map[index as usize] = Element::Occupied(value, epoch); + } + + pub(crate) fn remove(&mut self, id: I) -> Option<T> { + let (index, epoch, _) = id.unzip(); + match std::mem::replace(&mut self.map[index as usize], Element::Vacant) { + Element::Occupied(value, storage_epoch) => { + assert_eq!(epoch, storage_epoch); + Some(value) + } + Element::Error(..) => None, + Element::Vacant => panic!("Cannot remove a vacant resource"), + } + } + + // Prevents panic on out of range access, allows Vacant elements. + pub(crate) fn _try_remove(&mut self, id: I) -> Option<T> { + let (index, epoch, _) = id.unzip(); + if index as usize >= self.map.len() { + None + } else if let Element::Occupied(value, storage_epoch) = + std::mem::replace(&mut self.map[index as usize], Element::Vacant) + { + assert_eq!(epoch, storage_epoch); + Some(value) + } else { + None + } + } + + pub(crate) fn iter(&self, backend: Backend) -> impl Iterator<Item = (I, &T)> { + self.map + .iter() + .enumerate() + .filter_map(move |(index, x)| match *x { + Element::Occupied(ref value, storage_epoch) => { + Some((I::zip(index as Index, storage_epoch, backend), value)) + } + _ => None, + }) + } + + pub(crate) fn len(&self) -> usize { + self.map.len() + } + + fn generate_report(&self) -> StorageReport { + let mut report = StorageReport { + element_size: mem::size_of::<T>(), + ..Default::default() + }; + for element in self.map.iter() { + match *element { + Element::Occupied(..) => report.num_occupied += 1, + Element::Vacant => report.num_vacant += 1, + Element::Error(..) => report.num_error += 1, + } + } + report + } +} + +/// Type system for enforcing the lock order on shared HUB structures. +/// If type A implements `Access<B>`, that means we are allowed to proceed +/// with locking resource `B` after we lock `A`. +/// +/// The implementations basically describe the edges in a directed graph +/// of lock transitions. As long as it doesn't have loops, we can have +/// multiple concurrent paths on this graph (from multiple threads) without +/// deadlocks, i.e. there is always a path whose next resource is not locked +/// by some other path, at any time. +pub trait Access<A> {} + +pub enum Root {} +//TODO: establish an order instead of declaring all the pairs. +impl Access<Instance> for Root {} +impl Access<Surface> for Root {} +impl Access<Surface> for Instance {} +impl<A: HalApi> Access<Adapter<A>> for Root {} +impl<A: HalApi> Access<Adapter<A>> for Surface {} +impl<A: HalApi> Access<Device<A>> for Root {} +impl<A: HalApi> Access<Device<A>> for Surface {} +impl<A: HalApi> Access<Device<A>> for Adapter<A> {} +impl<A: HalApi> Access<PipelineLayout<A>> for Root {} +impl<A: HalApi> Access<PipelineLayout<A>> for Device<A> {} +impl<A: HalApi> Access<PipelineLayout<A>> for RenderBundle<A> {} +impl<A: HalApi> Access<BindGroupLayout<A>> for Root {} +impl<A: HalApi> Access<BindGroupLayout<A>> for Device<A> {} +impl<A: HalApi> Access<BindGroupLayout<A>> for PipelineLayout<A> {} +impl<A: HalApi> Access<BindGroup<A>> for Root {} +impl<A: HalApi> Access<BindGroup<A>> for Device<A> {} +impl<A: HalApi> Access<BindGroup<A>> for BindGroupLayout<A> {} +impl<A: HalApi> Access<BindGroup<A>> for PipelineLayout<A> {} +impl<A: HalApi> Access<BindGroup<A>> for CommandBuffer<A> {} +impl<A: HalApi> Access<CommandBuffer<A>> for Root {} +impl<A: HalApi> Access<CommandBuffer<A>> for Device<A> {} +impl<A: HalApi> Access<RenderBundle<A>> for Device<A> {} +impl<A: HalApi> Access<RenderBundle<A>> for CommandBuffer<A> {} +impl<A: HalApi> Access<ComputePipeline<A>> for Device<A> {} +impl<A: HalApi> Access<ComputePipeline<A>> for BindGroup<A> {} +impl<A: HalApi> Access<RenderPipeline<A>> for Device<A> {} +impl<A: HalApi> Access<RenderPipeline<A>> for BindGroup<A> {} +impl<A: HalApi> Access<RenderPipeline<A>> for ComputePipeline<A> {} +impl<A: HalApi> Access<QuerySet<A>> for Root {} +impl<A: HalApi> Access<QuerySet<A>> for Device<A> {} +impl<A: HalApi> Access<QuerySet<A>> for CommandBuffer<A> {} +impl<A: HalApi> Access<QuerySet<A>> for RenderPipeline<A> {} +impl<A: HalApi> Access<QuerySet<A>> for ComputePipeline<A> {} +impl<A: HalApi> Access<QuerySet<A>> for Sampler<A> {} +impl<A: HalApi> Access<ShaderModule<A>> for Device<A> {} +impl<A: HalApi> Access<ShaderModule<A>> for BindGroupLayout<A> {} +impl<A: HalApi> Access<Buffer<A>> for Root {} +impl<A: HalApi> Access<Buffer<A>> for Device<A> {} +impl<A: HalApi> Access<Buffer<A>> for BindGroupLayout<A> {} +impl<A: HalApi> Access<Buffer<A>> for BindGroup<A> {} +impl<A: HalApi> Access<Buffer<A>> for CommandBuffer<A> {} +impl<A: HalApi> Access<Buffer<A>> for ComputePipeline<A> {} +impl<A: HalApi> Access<Buffer<A>> for RenderPipeline<A> {} +impl<A: HalApi> Access<Buffer<A>> for QuerySet<A> {} +impl<A: HalApi> Access<StagingBuffer<A>> for Device<A> {} +impl<A: HalApi> Access<Texture<A>> for Root {} +impl<A: HalApi> Access<Texture<A>> for Device<A> {} +impl<A: HalApi> Access<Texture<A>> for Buffer<A> {} +impl<A: HalApi> Access<TextureView<A>> for Root {} +impl<A: HalApi> Access<TextureView<A>> for Device<A> {} +impl<A: HalApi> Access<TextureView<A>> for Texture<A> {} +impl<A: HalApi> Access<Sampler<A>> for Root {} +impl<A: HalApi> Access<Sampler<A>> for Device<A> {} +impl<A: HalApi> Access<Sampler<A>> for TextureView<A> {} + +#[cfg(debug_assertions)] +thread_local! { + static ACTIVE_TOKEN: Cell<u8> = Cell::new(0); +} + +/// A permission token to lock resource `T` or anything after it, +/// as defined by the `Access` implementations. +/// +/// Note: there can only be one non-borrowed `Token` alive on a thread +/// at a time, which is enforced by `ACTIVE_TOKEN`. +pub(crate) struct Token<'a, T: 'a> { + level: PhantomData<&'a T>, +} + +impl<'a, T> Token<'a, T> { + fn new() -> Self { + #[cfg(debug_assertions)] + ACTIVE_TOKEN.with(|active| { + let old = active.get(); + assert_ne!(old, 0, "Root token was dropped"); + active.set(old + 1); + }); + Self { level: PhantomData } + } +} + +impl Token<'static, Root> { + pub fn root() -> Self { + #[cfg(debug_assertions)] + ACTIVE_TOKEN.with(|active| { + assert_eq!(0, active.replace(1), "Root token is already active"); + }); + + Self { level: PhantomData } + } +} + +impl<'a, T> Drop for Token<'a, T> { + fn drop(&mut self) { + #[cfg(debug_assertions)] + ACTIVE_TOKEN.with(|active| { + let old = active.get(); + active.set(old - 1); + }); + } +} + +/// A type that can build true ids from proto-ids, and free true ids. +/// +/// For some implementations, the true id is based on the proto-id. +/// The caller is responsible for providing well-allocated proto-ids. +/// +/// For other implementations, the proto-id carries no information +/// (it's `()`, say), and this `IdentityHandler` type takes care of +/// allocating a fresh true id. +/// +/// See the module-level documentation for details. +pub trait IdentityHandler<I>: Debug { + /// The type of proto-id consumed by this filter, to produce a true id. + type Input: Clone + Debug; + + /// Given a proto-id value `id`, return a true id for `backend`. + fn process(&self, id: Self::Input, backend: Backend) -> I; + + /// Free the true id `id`. + fn free(&self, id: I); +} + +impl<I: id::TypedId + Debug> IdentityHandler<I> for Mutex<IdentityManager> { + type Input = (); + fn process(&self, _id: Self::Input, backend: Backend) -> I { + self.lock().alloc(backend) + } + fn free(&self, id: I) { + self.lock().free(id) + } +} + +/// A type that can produce [`IdentityHandler`] filters for ids of type `I`. +/// +/// See the module-level documentation for details. +pub trait IdentityHandlerFactory<I> { + /// The type of filter this factory constructs. + /// + /// "Filter" and "handler" seem to both mean the same thing here: + /// something that can produce true ids from proto-ids. + type Filter: IdentityHandler<I>; + + /// Create an [`IdentityHandler<I>`] implementation that can + /// transform proto-ids into ids of type `I`. + /// + /// [`IdentityHandler<I>`]: IdentityHandler + fn spawn(&self) -> Self::Filter; +} + +/// A global identity handler factory based on [`IdentityManager`]. +/// +/// Each of this type's `IdentityHandlerFactory<I>::spawn` methods +/// returns a `Mutex<IdentityManager<I>>`, which allocates fresh `I` +/// ids itself, and takes `()` as its proto-id type. +#[derive(Debug)] +pub struct IdentityManagerFactory; + +impl<I: id::TypedId + Debug> IdentityHandlerFactory<I> for IdentityManagerFactory { + type Filter = Mutex<IdentityManager>; + fn spawn(&self) -> Self::Filter { + Mutex::new(IdentityManager::default()) + } +} + +/// A factory that can build [`IdentityHandler`]s for all resource +/// types. +pub trait GlobalIdentityHandlerFactory: + IdentityHandlerFactory<id::AdapterId> + + IdentityHandlerFactory<id::DeviceId> + + IdentityHandlerFactory<id::PipelineLayoutId> + + IdentityHandlerFactory<id::ShaderModuleId> + + IdentityHandlerFactory<id::BindGroupLayoutId> + + IdentityHandlerFactory<id::BindGroupId> + + IdentityHandlerFactory<id::CommandBufferId> + + IdentityHandlerFactory<id::RenderBundleId> + + IdentityHandlerFactory<id::RenderPipelineId> + + IdentityHandlerFactory<id::ComputePipelineId> + + IdentityHandlerFactory<id::QuerySetId> + + IdentityHandlerFactory<id::BufferId> + + IdentityHandlerFactory<id::StagingBufferId> + + IdentityHandlerFactory<id::TextureId> + + IdentityHandlerFactory<id::TextureViewId> + + IdentityHandlerFactory<id::SamplerId> + + IdentityHandlerFactory<id::SurfaceId> +{ +} + +impl GlobalIdentityHandlerFactory for IdentityManagerFactory {} + +pub type Input<G, I> = <<G as IdentityHandlerFactory<I>>::Filter as IdentityHandler<I>>::Input; + +pub trait Resource { + const TYPE: &'static str; + fn life_guard(&self) -> &crate::LifeGuard; + fn label(&self) -> &str { + #[cfg(debug_assertions)] + return &self.life_guard().label; + #[cfg(not(debug_assertions))] + return ""; + } +} + +#[derive(Debug)] +pub struct Registry<T: Resource, I: id::TypedId, F: IdentityHandlerFactory<I>> { + identity: F::Filter, + data: RwLock<Storage<T, I>>, + backend: Backend, +} + +impl<T: Resource, I: id::TypedId, F: IdentityHandlerFactory<I>> Registry<T, I, F> { + fn new(backend: Backend, factory: &F) -> Self { + Self { + identity: factory.spawn(), + data: RwLock::new(Storage { + map: Vec::new(), + kind: T::TYPE, + _phantom: PhantomData, + }), + backend, + } + } + + fn without_backend(factory: &F, kind: &'static str) -> Self { + Self { + identity: factory.spawn(), + data: RwLock::new(Storage { + map: Vec::new(), + kind, + _phantom: PhantomData, + }), + backend: Backend::Empty, + } + } +} + +#[must_use] +pub(crate) struct FutureId<'a, I: id::TypedId, T> { + id: I, + data: &'a RwLock<Storage<T, I>>, +} + +impl<I: id::TypedId + Copy, T> FutureId<'_, I, T> { + #[cfg(feature = "trace")] + pub fn id(&self) -> I { + self.id + } + + pub fn into_id(self) -> I { + self.id + } + + pub fn assign<'a, A: Access<T>>(self, value: T, _: &'a mut Token<A>) -> id::Valid<I> { + self.data.write().insert(self.id, value); + id::Valid(self.id) + } + + pub fn assign_error<'a, A: Access<T>>(self, label: &str, _: &'a mut Token<A>) -> I { + self.data.write().insert_error(self.id, label); + self.id + } +} + +impl<T: Resource, I: id::TypedId + Copy, F: IdentityHandlerFactory<I>> Registry<T, I, F> { + pub(crate) fn prepare( + &self, + id_in: <F::Filter as IdentityHandler<I>>::Input, + ) -> FutureId<I, T> { + FutureId { + id: self.identity.process(id_in, self.backend), + data: &self.data, + } + } + + pub(crate) fn read<'a, A: Access<T>>( + &'a self, + _token: &'a mut Token<A>, + ) -> (RwLockReadGuard<'a, Storage<T, I>>, Token<'a, T>) { + (self.data.read(), Token::new()) + } + + pub(crate) fn write<'a, A: Access<T>>( + &'a self, + _token: &'a mut Token<A>, + ) -> (RwLockWriteGuard<'a, Storage<T, I>>, Token<'a, T>) { + (self.data.write(), Token::new()) + } + + pub fn unregister_locked(&self, id: I, guard: &mut Storage<T, I>) -> Option<T> { + let value = guard.remove(id); + //Note: careful about the order here! + self.identity.free(id); + //Returning None is legal if it's an error ID + value + } + + pub(crate) fn unregister<'a, A: Access<T>>( + &self, + id: I, + _token: &'a mut Token<A>, + ) -> (Option<T>, Token<'a, T>) { + let value = self.data.write().remove(id); + //Note: careful about the order here! + self.identity.free(id); + //Returning None is legal if it's an error ID + (value, Token::new()) + } + + pub fn label_for_resource(&self, id: I) -> String { + let guard = self.data.read(); + + let type_name = guard.kind; + match guard.get(id) { + Ok(res) => { + let label = res.label(); + if label.is_empty() { + format!("<{}-{:?}>", type_name, id.unzip()) + } else { + label.to_string() + } + } + Err(_) => format!( + "<Invalid-{} label={}>", + type_name, + guard.label_for_invalid_id(id) + ), + } + } +} + +#[derive(Debug)] +pub struct HubReport { + pub adapters: StorageReport, + pub devices: StorageReport, + pub pipeline_layouts: StorageReport, + pub shader_modules: StorageReport, + pub bind_group_layouts: StorageReport, + pub bind_groups: StorageReport, + pub command_buffers: StorageReport, + pub render_bundles: StorageReport, + pub render_pipelines: StorageReport, + pub compute_pipelines: StorageReport, + pub query_sets: StorageReport, + pub buffers: StorageReport, + pub textures: StorageReport, + pub texture_views: StorageReport, + pub samplers: StorageReport, +} + +impl HubReport { + pub fn is_empty(&self) -> bool { + self.adapters.is_empty() + } +} + +pub struct Hub<A: HalApi, F: GlobalIdentityHandlerFactory> { + pub adapters: Registry<Adapter<A>, id::AdapterId, F>, + pub devices: Registry<Device<A>, id::DeviceId, F>, + pub pipeline_layouts: Registry<PipelineLayout<A>, id::PipelineLayoutId, F>, + pub shader_modules: Registry<ShaderModule<A>, id::ShaderModuleId, F>, + pub bind_group_layouts: Registry<BindGroupLayout<A>, id::BindGroupLayoutId, F>, + pub bind_groups: Registry<BindGroup<A>, id::BindGroupId, F>, + pub command_buffers: Registry<CommandBuffer<A>, id::CommandBufferId, F>, + pub render_bundles: Registry<RenderBundle<A>, id::RenderBundleId, F>, + pub render_pipelines: Registry<RenderPipeline<A>, id::RenderPipelineId, F>, + pub compute_pipelines: Registry<ComputePipeline<A>, id::ComputePipelineId, F>, + pub query_sets: Registry<QuerySet<A>, id::QuerySetId, F>, + pub buffers: Registry<Buffer<A>, id::BufferId, F>, + pub staging_buffers: Registry<StagingBuffer<A>, id::StagingBufferId, F>, + pub textures: Registry<Texture<A>, id::TextureId, F>, + pub texture_views: Registry<TextureView<A>, id::TextureViewId, F>, + pub samplers: Registry<Sampler<A>, id::SamplerId, F>, +} + +impl<A: HalApi, F: GlobalIdentityHandlerFactory> Hub<A, F> { + fn new(factory: &F) -> Self { + Self { + adapters: Registry::new(A::VARIANT, factory), + devices: Registry::new(A::VARIANT, factory), + pipeline_layouts: Registry::new(A::VARIANT, factory), + shader_modules: Registry::new(A::VARIANT, factory), + bind_group_layouts: Registry::new(A::VARIANT, factory), + bind_groups: Registry::new(A::VARIANT, factory), + command_buffers: Registry::new(A::VARIANT, factory), + render_bundles: Registry::new(A::VARIANT, factory), + render_pipelines: Registry::new(A::VARIANT, factory), + compute_pipelines: Registry::new(A::VARIANT, factory), + query_sets: Registry::new(A::VARIANT, factory), + buffers: Registry::new(A::VARIANT, factory), + staging_buffers: Registry::new(A::VARIANT, factory), + textures: Registry::new(A::VARIANT, factory), + texture_views: Registry::new(A::VARIANT, factory), + samplers: Registry::new(A::VARIANT, factory), + } + } + + //TODO: instead of having a hacky `with_adapters` parameter, + // we should have `clear_device(device_id)` that specifically destroys + // everything related to a logical device. + fn clear(&self, surface_guard: &mut Storage<Surface, id::SurfaceId>, with_adapters: bool) { + use crate::resource::TextureInner; + use hal::{Device as _, Surface as _}; + + let mut devices = self.devices.data.write(); + for element in devices.map.iter_mut() { + if let Element::Occupied(ref mut device, _) = *element { + device.prepare_to_die(); + } + } + + // destroy command buffers first, since otherwise DX12 isn't happy + for element in self.command_buffers.data.write().map.drain(..) { + if let Element::Occupied(command_buffer, _) = element { + let device = &devices[command_buffer.device_id.value]; + device.destroy_command_buffer(command_buffer); + } + } + + for element in self.samplers.data.write().map.drain(..) { + if let Element::Occupied(sampler, _) = element { + unsafe { + devices[sampler.device_id.value] + .raw + .destroy_sampler(sampler.raw); + } + } + } + + for element in self.texture_views.data.write().map.drain(..) { + if let Element::Occupied(texture_view, _) = element { + let device = &devices[texture_view.device_id.value]; + unsafe { + device.raw.destroy_texture_view(texture_view.raw); + } + } + } + + for element in self.textures.data.write().map.drain(..) { + if let Element::Occupied(texture, _) = element { + let device = &devices[texture.device_id.value]; + if let TextureInner::Native { raw: Some(raw) } = texture.inner { + unsafe { + device.raw.destroy_texture(raw); + } + } + if let TextureClearMode::RenderPass { clear_views, .. } = texture.clear_mode { + for view in clear_views { + unsafe { + device.raw.destroy_texture_view(view); + } + } + } + } + } + for element in self.buffers.data.write().map.drain(..) { + if let Element::Occupied(buffer, _) = element { + //TODO: unmap if needed + devices[buffer.device_id.value].destroy_buffer(buffer); + } + } + for element in self.bind_groups.data.write().map.drain(..) { + if let Element::Occupied(bind_group, _) = element { + let device = &devices[bind_group.device_id.value]; + unsafe { + device.raw.destroy_bind_group(bind_group.raw); + } + } + } + + for element in self.shader_modules.data.write().map.drain(..) { + if let Element::Occupied(module, _) = element { + let device = &devices[module.device_id.value]; + unsafe { + device.raw.destroy_shader_module(module.raw); + } + } + } + for element in self.bind_group_layouts.data.write().map.drain(..) { + if let Element::Occupied(bgl, _) = element { + let device = &devices[bgl.device_id.value]; + unsafe { + device.raw.destroy_bind_group_layout(bgl.raw); + } + } + } + for element in self.pipeline_layouts.data.write().map.drain(..) { + if let Element::Occupied(pipeline_layout, _) = element { + let device = &devices[pipeline_layout.device_id.value]; + unsafe { + device.raw.destroy_pipeline_layout(pipeline_layout.raw); + } + } + } + for element in self.compute_pipelines.data.write().map.drain(..) { + if let Element::Occupied(pipeline, _) = element { + let device = &devices[pipeline.device_id.value]; + unsafe { + device.raw.destroy_compute_pipeline(pipeline.raw); + } + } + } + for element in self.render_pipelines.data.write().map.drain(..) { + if let Element::Occupied(pipeline, _) = element { + let device = &devices[pipeline.device_id.value]; + unsafe { + device.raw.destroy_render_pipeline(pipeline.raw); + } + } + } + + for element in surface_guard.map.iter_mut() { + if let Element::Occupied(ref mut surface, _epoch) = *element { + if surface + .presentation + .as_ref() + .map_or(wgt::Backend::Empty, |p| p.backend()) + != A::VARIANT + { + continue; + } + if let Some(present) = surface.presentation.take() { + let device = &devices[present.device_id.value]; + let suf = A::get_surface_mut(surface); + unsafe { + suf.unwrap().raw.unconfigure(&device.raw); + //TODO: we could destroy the surface here + } + } + } + } + + for element in self.query_sets.data.write().map.drain(..) { + if let Element::Occupied(query_set, _) = element { + let device = &devices[query_set.device_id.value]; + unsafe { + device.raw.destroy_query_set(query_set.raw); + } + } + } + + for element in devices.map.drain(..) { + if let Element::Occupied(device, _) = element { + device.dispose(); + } + } + + if with_adapters { + drop(devices); + self.adapters.data.write().map.clear(); + } + } + + pub fn generate_report(&self) -> HubReport { + HubReport { + adapters: self.adapters.data.read().generate_report(), + devices: self.devices.data.read().generate_report(), + pipeline_layouts: self.pipeline_layouts.data.read().generate_report(), + shader_modules: self.shader_modules.data.read().generate_report(), + bind_group_layouts: self.bind_group_layouts.data.read().generate_report(), + bind_groups: self.bind_groups.data.read().generate_report(), + command_buffers: self.command_buffers.data.read().generate_report(), + render_bundles: self.render_bundles.data.read().generate_report(), + render_pipelines: self.render_pipelines.data.read().generate_report(), + compute_pipelines: self.compute_pipelines.data.read().generate_report(), + query_sets: self.query_sets.data.read().generate_report(), + buffers: self.buffers.data.read().generate_report(), + textures: self.textures.data.read().generate_report(), + texture_views: self.texture_views.data.read().generate_report(), + samplers: self.samplers.data.read().generate_report(), + } + } +} + +pub struct Hubs<F: GlobalIdentityHandlerFactory> { + #[cfg(feature = "vulkan")] + vulkan: Hub<hal::api::Vulkan, F>, + #[cfg(feature = "metal")] + metal: Hub<hal::api::Metal, F>, + #[cfg(feature = "dx12")] + dx12: Hub<hal::api::Dx12, F>, + #[cfg(feature = "dx11")] + dx11: Hub<hal::api::Dx11, F>, + #[cfg(feature = "gles")] + gl: Hub<hal::api::Gles, F>, +} + +impl<F: GlobalIdentityHandlerFactory> Hubs<F> { + fn new(factory: &F) -> Self { + Self { + #[cfg(feature = "vulkan")] + vulkan: Hub::new(factory), + #[cfg(feature = "metal")] + metal: Hub::new(factory), + #[cfg(feature = "dx12")] + dx12: Hub::new(factory), + #[cfg(feature = "dx11")] + dx11: Hub::new(factory), + #[cfg(feature = "gles")] + gl: Hub::new(factory), + } + } +} + +#[derive(Debug)] +pub struct GlobalReport { + pub surfaces: StorageReport, + #[cfg(feature = "vulkan")] + pub vulkan: Option<HubReport>, + #[cfg(feature = "metal")] + pub metal: Option<HubReport>, + #[cfg(feature = "dx12")] + pub dx12: Option<HubReport>, + #[cfg(feature = "dx11")] + pub dx11: Option<HubReport>, + #[cfg(feature = "gles")] + pub gl: Option<HubReport>, +} + +pub struct Global<G: GlobalIdentityHandlerFactory> { + pub instance: Instance, + pub surfaces: Registry<Surface, id::SurfaceId, G>, + hubs: Hubs<G>, +} + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + pub fn new(name: &str, factory: G, backends: wgt::Backends) -> Self { + profiling::scope!("Global::new"); + Self { + instance: Instance::new(name, backends), + surfaces: Registry::without_backend(&factory, "Surface"), + hubs: Hubs::new(&factory), + } + } + + /// # Safety + /// + /// Refer to the creation of wgpu-hal Instance for every backend. + pub unsafe fn from_hal_instance<A: HalApi>( + name: &str, + factory: G, + hal_instance: A::Instance, + ) -> Self { + profiling::scope!("Global::new"); + Self { + instance: A::create_instance_from_hal(name, hal_instance), + surfaces: Registry::without_backend(&factory, "Surface"), + hubs: Hubs::new(&factory), + } + } + + /// # Safety + /// + /// - The raw instance handle returned must not be manually destroyed. + pub unsafe fn instance_as_hal<A: HalApi>(&self) -> Option<&A::Instance> { + A::instance_as_hal(&self.instance) + } + + /// # Safety + /// + /// - The raw handles obtained from the Instance must not be manually destroyed + pub unsafe fn from_instance(factory: G, instance: Instance) -> Self { + profiling::scope!("Global::new"); + Self { + instance, + surfaces: Registry::without_backend(&factory, "Surface"), + hubs: Hubs::new(&factory), + } + } + + pub fn clear_backend<A: HalApi>(&self, _dummy: ()) { + let mut surface_guard = self.surfaces.data.write(); + let hub = A::hub(self); + // this is used for tests, which keep the adapter + hub.clear(&mut surface_guard, false); + } + + pub fn generate_report(&self) -> GlobalReport { + GlobalReport { + surfaces: self.surfaces.data.read().generate_report(), + #[cfg(feature = "vulkan")] + vulkan: if self.instance.vulkan.is_some() { + Some(self.hubs.vulkan.generate_report()) + } else { + None + }, + #[cfg(feature = "metal")] + metal: if self.instance.metal.is_some() { + Some(self.hubs.metal.generate_report()) + } else { + None + }, + #[cfg(feature = "dx12")] + dx12: if self.instance.dx12.is_some() { + Some(self.hubs.dx12.generate_report()) + } else { + None + }, + #[cfg(feature = "dx11")] + dx11: if self.instance.dx11.is_some() { + Some(self.hubs.dx11.generate_report()) + } else { + None + }, + #[cfg(feature = "gles")] + gl: if self.instance.gl.is_some() { + Some(self.hubs.gl.generate_report()) + } else { + None + }, + } + } +} + +impl<G: GlobalIdentityHandlerFactory> Drop for Global<G> { + fn drop(&mut self) { + profiling::scope!("Global::drop"); + log::info!("Dropping Global"); + let mut surface_guard = self.surfaces.data.write(); + + // destroy hubs before the instance gets dropped + #[cfg(feature = "vulkan")] + { + self.hubs.vulkan.clear(&mut surface_guard, true); + } + #[cfg(feature = "metal")] + { + self.hubs.metal.clear(&mut surface_guard, true); + } + #[cfg(feature = "dx12")] + { + self.hubs.dx12.clear(&mut surface_guard, true); + } + #[cfg(feature = "dx11")] + { + self.hubs.dx11.clear(&mut surface_guard, true); + } + #[cfg(feature = "gles")] + { + self.hubs.gl.clear(&mut surface_guard, true); + } + + // destroy surfaces + for element in surface_guard.map.drain(..) { + if let Element::Occupied(surface, _) = element { + self.instance.destroy_surface(surface); + } + } + } +} + +pub trait HalApi: hal::Api { + const VARIANT: Backend; + fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance; + fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance>; + fn hub<G: GlobalIdentityHandlerFactory>(global: &Global<G>) -> &Hub<Self, G>; + fn get_surface(surface: &Surface) -> Option<&HalSurface<Self>>; + fn get_surface_mut(surface: &mut Surface) -> Option<&mut HalSurface<Self>>; +} + +impl HalApi for hal::api::Empty { + const VARIANT: Backend = Backend::Empty; + fn create_instance_from_hal(_: &str, _: Self::Instance) -> Instance { + unimplemented!("called empty api") + } + fn instance_as_hal(_: &Instance) -> Option<&Self::Instance> { + unimplemented!("called empty api") + } + fn hub<G: GlobalIdentityHandlerFactory>(_: &Global<G>) -> &Hub<Self, G> { + unimplemented!("called empty api") + } + fn get_surface(_: &Surface) -> Option<&HalSurface<Self>> { + unimplemented!("called empty api") + } + fn get_surface_mut(_: &mut Surface) -> Option<&mut HalSurface<Self>> { + unimplemented!("called empty api") + } +} + +#[cfg(feature = "vulkan")] +impl HalApi for hal::api::Vulkan { + const VARIANT: Backend = Backend::Vulkan; + fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance { + Instance { + name: name.to_owned(), + vulkan: Some(hal_instance), + ..Default::default() + } + } + fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> { + instance.vulkan.as_ref() + } + fn hub<G: GlobalIdentityHandlerFactory>(global: &Global<G>) -> &Hub<Self, G> { + &global.hubs.vulkan + } + fn get_surface(surface: &Surface) -> Option<&HalSurface<Self>> { + surface.vulkan.as_ref() + } + fn get_surface_mut(surface: &mut Surface) -> Option<&mut HalSurface<Self>> { + surface.vulkan.as_mut() + } +} + +#[cfg(feature = "metal")] +impl HalApi for hal::api::Metal { + const VARIANT: Backend = Backend::Metal; + fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance { + Instance { + name: name.to_owned(), + metal: Some(hal_instance), + ..Default::default() + } + } + fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> { + instance.metal.as_ref() + } + fn hub<G: GlobalIdentityHandlerFactory>(global: &Global<G>) -> &Hub<Self, G> { + &global.hubs.metal + } + fn get_surface(surface: &Surface) -> Option<&HalSurface<Self>> { + surface.metal.as_ref() + } + fn get_surface_mut(surface: &mut Surface) -> Option<&mut HalSurface<Self>> { + surface.metal.as_mut() + } +} + +#[cfg(feature = "dx12")] +impl HalApi for hal::api::Dx12 { + const VARIANT: Backend = Backend::Dx12; + fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance { + Instance { + name: name.to_owned(), + dx12: Some(hal_instance), + ..Default::default() + } + } + fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> { + instance.dx12.as_ref() + } + fn hub<G: GlobalIdentityHandlerFactory>(global: &Global<G>) -> &Hub<Self, G> { + &global.hubs.dx12 + } + fn get_surface(surface: &Surface) -> Option<&HalSurface<Self>> { + surface.dx12.as_ref() + } + fn get_surface_mut(surface: &mut Surface) -> Option<&mut HalSurface<Self>> { + surface.dx12.as_mut() + } +} + +#[cfg(feature = "dx11")] +impl HalApi for hal::api::Dx11 { + const VARIANT: Backend = Backend::Dx11; + fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance { + Instance { + name: name.to_owned(), + dx11: Some(hal_instance), + ..Default::default() + } + } + fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> { + instance.dx11.as_ref() + } + fn hub<G: GlobalIdentityHandlerFactory>(global: &Global<G>) -> &Hub<Self, G> { + &global.hubs.dx11 + } + fn get_surface(surface: &Surface) -> Option<&HalSurface<Self>> { + surface.dx11.as_ref() + } + fn get_surface_mut(surface: &mut Surface) -> Option<&mut HalSurface<Self>> { + surface.dx11.as_mut() + } +} + +#[cfg(feature = "gles")] +impl HalApi for hal::api::Gles { + const VARIANT: Backend = Backend::Gl; + fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance { + #[allow(clippy::needless_update)] + Instance { + name: name.to_owned(), + gl: Some(hal_instance), + ..Default::default() + } + } + fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance> { + instance.gl.as_ref() + } + fn hub<G: GlobalIdentityHandlerFactory>(global: &Global<G>) -> &Hub<Self, G> { + &global.hubs.gl + } + fn get_surface(surface: &Surface) -> Option<&HalSurface<Self>> { + surface.gl.as_ref() + } + fn get_surface_mut(surface: &mut Surface) -> Option<&mut HalSurface<Self>> { + surface.gl.as_mut() + } +} + +#[cfg(test)] +fn _test_send_sync(global: &Global<IdentityManagerFactory>) { + fn test_internal<T: Send + Sync>(_: T) {} + test_internal(global) +} + +#[test] +fn test_epoch_end_of_life() { + use id::TypedId as _; + let mut man = IdentityManager::default(); + man.epochs.push(id::EPOCH_MASK); + man.free.push(0); + let id1 = man.alloc::<id::BufferId>(Backend::Empty); + assert_eq!(id1.unzip().0, 0); + man.free(id1); + let id2 = man.alloc::<id::BufferId>(Backend::Empty); + // confirm that the index 0 is no longer re-used + assert_eq!(id2.unzip().0, 1); +} diff --git a/third_party/rust/wgpu-core/src/id.rs b/third_party/rust/wgpu-core/src/id.rs new file mode 100644 index 0000000000..a326e08307 --- /dev/null +++ b/third_party/rust/wgpu-core/src/id.rs @@ -0,0 +1,278 @@ +use crate::{Epoch, Index}; +use std::{cmp::Ordering, fmt, marker::PhantomData}; +use wgt::Backend; + +#[cfg(feature = "id32")] +type IdType = u32; +#[cfg(not(feature = "id32"))] +type IdType = u64; +#[cfg(feature = "id32")] +type NonZeroId = std::num::NonZeroU32; +#[cfg(not(feature = "id32"))] +type NonZeroId = std::num::NonZeroU64; +#[cfg(feature = "id32")] +type ZippedIndex = u16; +#[cfg(not(feature = "id32"))] +type ZippedIndex = Index; + +const INDEX_BITS: usize = std::mem::size_of::<ZippedIndex>() * 8; +const EPOCH_BITS: usize = INDEX_BITS - BACKEND_BITS; +const BACKEND_BITS: usize = 3; +const BACKEND_SHIFT: usize = INDEX_BITS * 2 - BACKEND_BITS; +pub const EPOCH_MASK: u32 = (1 << (EPOCH_BITS)) - 1; +type Dummy = hal::api::Empty; + +/// An identifier for a wgpu object. +/// +/// An `Id<T>` value identifies a value stored in a [`Global`]'s [`Hub`]'s [`Storage`]. +/// `Storage` implements [`Index`] and [`IndexMut`], accepting `Id` values as indices. +/// +/// ## Note on `Id` typing +/// +/// You might assume that an `Id<T>` can only be used to retrieve a resource of +/// type `T`, but that is not quite the case. The id types in `wgpu-core`'s +/// public API ([`TextureId`], for example) can refer to resources belonging to +/// any backend, but the corresponding resource types ([`Texture<A>`], for +/// example) are always parameterized by a specific backend `A`. +/// +/// So the `T` in `Id<T>` is usually a resource type like `Texture<Empty>`, +/// where [`Empty`] is the `wgpu_hal` dummy back end. These empty types are +/// never actually used, beyond just making sure you access each `Storage` with +/// the right kind of identifier. The members of [`Hub<A>`] pair up each +/// `X<Empty>` type with the resource type `X<A>`, for some specific backend +/// `A`. +/// +/// [`Global`]: crate::hub::Global +/// [`Hub`]: crate::hub::Hub +/// [`Hub<A>`]: crate::hub::Hub +/// [`Storage`]: crate::hub::Storage +/// [`Texture<A>`]: crate::resource::Texture +/// [`Index`]: std::ops::Index +/// [`IndexMut`]: std::ops::IndexMut +/// [`Registry`]: crate::hub::Registry +/// [`Empty`]: hal::api::Empty +#[repr(transparent)] +#[cfg_attr(feature = "trace", derive(serde::Serialize), serde(into = "SerialId"))] +#[cfg_attr( + feature = "replay", + derive(serde::Deserialize), + serde(from = "SerialId") +)] +#[cfg_attr( + all(feature = "serde", not(feature = "trace")), + derive(serde::Serialize) +)] +#[cfg_attr( + all(feature = "serde", not(feature = "replay")), + derive(serde::Deserialize) +)] +pub struct Id<T>(NonZeroId, PhantomData<T>); + +// This type represents Id in a more readable (and editable) way. +#[allow(dead_code)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +enum SerialId { + // The only variant forces RON to not ignore "Id" + Id(Index, Epoch, Backend), +} +#[cfg(feature = "trace")] +impl<T> From<Id<T>> for SerialId { + fn from(id: Id<T>) -> Self { + let (index, epoch, backend) = id.unzip(); + Self::Id(index, epoch, backend) + } +} +#[cfg(feature = "replay")] +impl<T> From<SerialId> for Id<T> { + fn from(id: SerialId) -> Self { + match id { + SerialId::Id(index, epoch, backend) => TypedId::zip(index, epoch, backend), + } + } +} + +impl<T> Id<T> { + /// # Safety + /// + /// The raw id must be valid for the type. + pub unsafe fn from_raw(raw: NonZeroId) -> Self { + Self(raw, PhantomData) + } + + #[allow(dead_code)] + pub(crate) fn dummy(index: u32) -> Valid<Self> { + Valid(Id::zip(index, 1, Backend::Empty)) + } + + pub fn backend(self) -> Backend { + match self.0.get() >> (BACKEND_SHIFT) as u8 { + 0 => Backend::Empty, + 1 => Backend::Vulkan, + 2 => Backend::Metal, + 3 => Backend::Dx12, + 4 => Backend::Dx11, + 5 => Backend::Gl, + _ => unreachable!(), + } + } +} + +impl<T> Copy for Id<T> {} + +impl<T> Clone for Id<T> { + fn clone(&self) -> Self { + Self(self.0, PhantomData) + } +} + +impl<T> fmt::Debug for Id<T> { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + self.unzip().fmt(formatter) + } +} + +impl<T> std::hash::Hash for Id<T> { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + self.0.hash(state); + } +} + +impl<T> PartialEq for Id<T> { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl<T> Eq for Id<T> {} + +impl<T> PartialOrd for Id<T> { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + self.0.partial_cmp(&other.0) + } +} + +impl<T> Ord for Id<T> { + fn cmp(&self, other: &Self) -> Ordering { + self.0.cmp(&other.0) + } +} + +/// An internal ID that has been checked to point to +/// a valid object in the storages. +#[repr(transparent)] +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub(crate) struct Valid<I>(pub I); + +/// Trait carrying methods for direct `Id` access. +/// +/// Most `wgpu-core` clients should not use this trait. Unusual clients that +/// need to construct `Id` values directly, or access their components, like the +/// WGPU recording player, may use this trait to do so. +pub trait TypedId: Copy { + fn as_raw(&self) -> NonZeroId; + fn zip(index: Index, epoch: Epoch, backend: Backend) -> Self; + fn unzip(self) -> (Index, Epoch, Backend); + fn into_raw(self) -> NonZeroId; +} + +#[allow(trivial_numeric_casts)] +impl<T> TypedId for Id<T> { + fn as_raw(&self) -> NonZeroId { + self.0 + } + + fn zip(index: Index, epoch: Epoch, backend: Backend) -> Self { + assert_eq!(0, epoch >> EPOCH_BITS); + assert_eq!(0, (index as IdType) >> INDEX_BITS); + let v = index as IdType + | ((epoch as IdType) << INDEX_BITS) + | ((backend as IdType) << BACKEND_SHIFT); + Id(NonZeroId::new(v).unwrap(), PhantomData) + } + + fn unzip(self) -> (Index, Epoch, Backend) { + ( + (self.0.get() as ZippedIndex) as Index, + (((self.0.get() >> INDEX_BITS) as ZippedIndex) & (EPOCH_MASK as ZippedIndex)) as Index, + self.backend(), + ) + } + + fn into_raw(self) -> NonZeroId { + self.0 + } +} + +pub type AdapterId = Id<crate::instance::Adapter<Dummy>>; +pub type SurfaceId = Id<crate::instance::Surface>; +// Device +pub type DeviceId = Id<crate::device::Device<Dummy>>; +pub type QueueId = DeviceId; +// Resource +pub type BufferId = Id<crate::resource::Buffer<Dummy>>; +pub type StagingBufferId = Id<crate::resource::StagingBuffer<Dummy>>; +pub type TextureViewId = Id<crate::resource::TextureView<Dummy>>; +pub type TextureId = Id<crate::resource::Texture<Dummy>>; +pub type SamplerId = Id<crate::resource::Sampler<Dummy>>; +// Binding model +pub type BindGroupLayoutId = Id<crate::binding_model::BindGroupLayout<Dummy>>; +pub type PipelineLayoutId = Id<crate::binding_model::PipelineLayout<Dummy>>; +pub type BindGroupId = Id<crate::binding_model::BindGroup<Dummy>>; +// Pipeline +pub type ShaderModuleId = Id<crate::pipeline::ShaderModule<Dummy>>; +pub type RenderPipelineId = Id<crate::pipeline::RenderPipeline<Dummy>>; +pub type ComputePipelineId = Id<crate::pipeline::ComputePipeline<Dummy>>; +// Command +pub type CommandEncoderId = CommandBufferId; +pub type CommandBufferId = Id<crate::command::CommandBuffer<Dummy>>; +pub type RenderPassEncoderId = *mut crate::command::RenderPass; +pub type ComputePassEncoderId = *mut crate::command::ComputePass; +pub type RenderBundleEncoderId = *mut crate::command::RenderBundleEncoder; +pub type RenderBundleId = Id<crate::command::RenderBundle<Dummy>>; +pub type QuerySetId = Id<crate::resource::QuerySet<Dummy>>; + +#[test] +fn test_id_backend() { + for &b in &[ + Backend::Empty, + Backend::Vulkan, + Backend::Metal, + Backend::Dx12, + Backend::Dx11, + Backend::Gl, + ] { + let id: Id<()> = Id::zip(1, 0, b); + let (_id, _epoch, backend) = id.unzip(); + assert_eq!(id.backend(), b); + assert_eq!(backend, b); + } +} + +#[test] +fn test_id() { + let last_index = ((1u64 << INDEX_BITS) - 1) as Index; + let indexes = [1, last_index / 2 - 1, last_index / 2 + 1, last_index]; + let epochs = [1, EPOCH_MASK / 2 - 1, EPOCH_MASK / 2 + 1, EPOCH_MASK]; + let backends = [ + Backend::Empty, + Backend::Vulkan, + Backend::Metal, + Backend::Dx12, + Backend::Dx11, + Backend::Gl, + ]; + for &i in &indexes { + for &e in &epochs { + for &b in &backends { + let id: Id<()> = Id::zip(i, e, b); + let (index, epoch, backend) = id.unzip(); + assert_eq!(index, i); + assert_eq!(epoch, e); + assert_eq!(backend, b); + } + } + } +} diff --git a/third_party/rust/wgpu-core/src/init_tracker/buffer.rs b/third_party/rust/wgpu-core/src/init_tracker/buffer.rs new file mode 100644 index 0000000000..ea9b9f6a8d --- /dev/null +++ b/third_party/rust/wgpu-core/src/init_tracker/buffer.rs @@ -0,0 +1,35 @@ +use super::{InitTracker, MemoryInitKind}; +use crate::id::BufferId; +use std::ops::Range; + +#[derive(Debug, Clone)] +pub(crate) struct BufferInitTrackerAction { + pub id: BufferId, + pub range: Range<wgt::BufferAddress>, + pub kind: MemoryInitKind, +} + +pub(crate) type BufferInitTracker = InitTracker<wgt::BufferAddress>; + +impl BufferInitTracker { + /// Checks if an action has/requires any effect on the initialization status + /// and shrinks its range if possible. + pub(crate) fn check_action( + &self, + action: &BufferInitTrackerAction, + ) -> Option<BufferInitTrackerAction> { + self.create_action(action.id, action.range.clone(), action.kind) + } + + /// Creates an action if it would have any effect on the initialization + /// status and shrinks the range if possible. + pub(crate) fn create_action( + &self, + id: BufferId, + query_range: Range<wgt::BufferAddress>, + kind: MemoryInitKind, + ) -> Option<BufferInitTrackerAction> { + self.check(query_range) + .map(|range| BufferInitTrackerAction { id, range, kind }) + } +} diff --git a/third_party/rust/wgpu-core/src/init_tracker/mod.rs b/third_party/rust/wgpu-core/src/init_tracker/mod.rs new file mode 100644 index 0000000000..c7e78bc993 --- /dev/null +++ b/third_party/rust/wgpu-core/src/init_tracker/mod.rs @@ -0,0 +1,380 @@ +/*! Lazy initialization of texture and buffer memory. + +The WebGPU specification requires all texture & buffer memory to be +zero initialized on first read. To avoid unnecessary inits, we track +the initialization status of every resource and perform inits lazily. + +The granularity is different for buffers and textures: + +- Buffer: Byte granularity to support usecases with large, partially + bound buffers well. + +- Texture: Mip-level per layer. That is, a 2D surface is either + completely initialized or not, subrects are not tracked. + +Every use of a buffer/texture generates a InitTrackerAction which are +recorded and later resolved at queue submit by merging them with the +current state and each other in execution order. + +It is important to note that from the point of view of the memory init +system there are two kind of writes: + +- **Full writes**: Any kind of memcpy operation. These cause a + `MemoryInitKind.ImplicitlyInitialized` action. + +- **(Potentially) partial writes**: For example, write use in a + Shader. The system is not able to determine if a resource is fully + initialized afterwards but is no longer allowed to perform any + clears, therefore this leads to a + `MemoryInitKind.ImplicitlyInitialized` action, exactly like a read + would. + + */ + +use smallvec::SmallVec; +use std::{fmt, iter, ops::Range}; + +mod buffer; +mod texture; + +pub(crate) use buffer::{BufferInitTracker, BufferInitTrackerAction}; +pub(crate) use texture::{ + has_copy_partial_init_tracker_coverage, TextureInitRange, TextureInitTracker, + TextureInitTrackerAction, +}; + +#[derive(Debug, Clone, Copy)] +pub(crate) enum MemoryInitKind { + // The memory range is going to be written by an already initialized source, + // thus doesn't need extra attention other than marking as initialized. + ImplicitlyInitialized, + // The memory range is going to be read, therefore needs to ensure prior + // initialization. + NeedsInitializedMemory, +} + +// Most of the time a resource is either fully uninitialized (one element) or +// initialized (zero elements). +type UninitializedRangeVec<Idx> = SmallVec<[Range<Idx>; 1]>; + +/// Tracks initialization status of a linear range from 0..size +#[derive(Debug, Clone)] +pub(crate) struct InitTracker<Idx: Ord + Copy + Default> { + // Ordered, non overlapping list of all uninitialized ranges. + uninitialized_ranges: UninitializedRangeVec<Idx>, +} + +pub(crate) struct InitTrackerDrain<'a, Idx: fmt::Debug + Ord + Copy> { + uninitialized_ranges: &'a mut UninitializedRangeVec<Idx>, + drain_range: Range<Idx>, + first_index: usize, + next_index: usize, +} + +impl<'a, Idx> Iterator for InitTrackerDrain<'a, Idx> +where + Idx: fmt::Debug + Ord + Copy, +{ + type Item = Range<Idx>; + + fn next(&mut self) -> Option<Self::Item> { + if let Some(r) = self + .uninitialized_ranges + .get(self.next_index) + .and_then(|range| { + if range.start < self.drain_range.end { + Some(range.clone()) + } else { + None + } + }) + { + self.next_index += 1; + Some(r.start.max(self.drain_range.start)..r.end.min(self.drain_range.end)) + } else { + let num_affected = self.next_index - self.first_index; + if num_affected == 0 { + return None; + } + let first_range = &mut self.uninitialized_ranges[self.first_index]; + + // Split one "big" uninitialized range? + if num_affected == 1 + && first_range.start < self.drain_range.start + && first_range.end > self.drain_range.end + { + let old_start = first_range.start; + first_range.start = self.drain_range.end; + self.uninitialized_ranges + .insert(self.first_index, old_start..self.drain_range.start); + } + // Adjust border ranges and delete everything in-between. + else { + let remove_start = if first_range.start >= self.drain_range.start { + self.first_index + } else { + first_range.end = self.drain_range.start; + self.first_index + 1 + }; + + let last_range = &mut self.uninitialized_ranges[self.next_index - 1]; + let remove_end = if last_range.end <= self.drain_range.end { + self.next_index + } else { + last_range.start = self.drain_range.end; + self.next_index - 1 + }; + + self.uninitialized_ranges.drain(remove_start..remove_end); + } + + None + } + } +} + +impl<'a, Idx> Drop for InitTrackerDrain<'a, Idx> +where + Idx: fmt::Debug + Ord + Copy, +{ + fn drop(&mut self) { + if self.next_index <= self.first_index { + for _ in self {} + } + } +} + +impl<Idx> InitTracker<Idx> +where + Idx: fmt::Debug + Ord + Copy + Default, +{ + pub(crate) fn new(size: Idx) -> Self { + Self { + uninitialized_ranges: iter::once(Idx::default()..size).collect(), + } + } + + // Checks if there's any uninitialized ranges within a query. + // + // If there are any, the range returned a the subrange of the query_range + // that contains all these uninitialized regions. Returned range may be + // larger than necessary (tradeoff for making this function O(log n)) + pub(crate) fn check(&self, query_range: Range<Idx>) -> Option<Range<Idx>> { + let index = self + .uninitialized_ranges + .partition_point(|r| r.end <= query_range.start); + self.uninitialized_ranges + .get(index) + .and_then(|start_range| { + if start_range.start < query_range.end { + let start = start_range.start.max(query_range.start); + match self.uninitialized_ranges.get(index + 1) { + Some(next_range) => { + if next_range.start < query_range.end { + // Would need to keep iterating for more + // accurate upper bound. Don't do that here. + Some(start..query_range.end) + } else { + Some(start..start_range.end.min(query_range.end)) + } + } + None => Some(start..start_range.end.min(query_range.end)), + } + } else { + None + } + }) + } + + // Drains uninitialized ranges in a query range. + pub(crate) fn drain(&mut self, drain_range: Range<Idx>) -> InitTrackerDrain<Idx> { + let index = self + .uninitialized_ranges + .partition_point(|r| r.end <= drain_range.start); + InitTrackerDrain { + drain_range, + uninitialized_ranges: &mut self.uninitialized_ranges, + first_index: index, + next_index: index, + } + } +} + +impl InitTracker<u32> { + // Makes a single entry uninitialized if not already uninitialized + #[allow(dead_code)] + pub(crate) fn discard(&mut self, pos: u32) { + // first range where end>=idx + let r_idx = self.uninitialized_ranges.partition_point(|r| r.end < pos); + if let Some(r) = self.uninitialized_ranges.get(r_idx) { + // Extend range at end + if r.end == pos { + // merge with next? + if let Some(right) = self.uninitialized_ranges.get(r_idx + 1) { + if right.start == pos + 1 { + self.uninitialized_ranges[r_idx] = r.start..right.end; + self.uninitialized_ranges.remove(r_idx + 1); + return; + } + } + self.uninitialized_ranges[r_idx] = r.start..(pos + 1); + } else if r.start > pos { + // may still extend range at beginning + if r.start == pos + 1 { + self.uninitialized_ranges[r_idx] = pos..r.end; + } else { + // previous range end must be smaller than idx, therefore no merge possible + self.uninitialized_ranges.push(pos..(pos + 1)); + } + } + } else { + self.uninitialized_ranges.push(pos..(pos + 1)); + } + } +} + +#[cfg(test)] +mod test { + use std::ops::Range; + + type Tracker = super::InitTracker<u32>; + + #[test] + fn check_for_newly_created_tracker() { + let tracker = Tracker::new(10); + assert_eq!(tracker.check(0..10), Some(0..10)); + assert_eq!(tracker.check(0..3), Some(0..3)); + assert_eq!(tracker.check(3..4), Some(3..4)); + assert_eq!(tracker.check(4..10), Some(4..10)); + } + + #[test] + fn check_for_drained_tracker() { + let mut tracker = Tracker::new(10); + tracker.drain(0..10); + assert_eq!(tracker.check(0..10), None); + assert_eq!(tracker.check(0..3), None); + assert_eq!(tracker.check(3..4), None); + assert_eq!(tracker.check(4..10), None); + } + + #[test] + fn check_for_partially_filled_tracker() { + let mut tracker = Tracker::new(25); + // Two regions of uninitialized memory + tracker.drain(0..5); + tracker.drain(10..15); + tracker.drain(20..25); + + assert_eq!(tracker.check(0..25), Some(5..25)); // entire range + + assert_eq!(tracker.check(0..5), None); // left non-overlapping + assert_eq!(tracker.check(3..8), Some(5..8)); // left overlapping region + assert_eq!(tracker.check(3..17), Some(5..17)); // left overlapping region + contained region + + // right overlapping region + contained region (yes, doesn't fix range end!) + assert_eq!(tracker.check(8..22), Some(8..22)); + // right overlapping region + assert_eq!(tracker.check(17..22), Some(17..20)); + // right non-overlapping + assert_eq!(tracker.check(20..25), None); + } + + #[test] + fn drain_already_drained() { + let mut tracker = Tracker::new(30); + tracker.drain(10..20); + + // Overlapping with non-cleared + tracker.drain(5..15); // Left overlap + tracker.drain(15..25); // Right overlap + tracker.drain(0..30); // Inner overlap + + // Clear fully cleared + tracker.drain(0..30); + + assert_eq!(tracker.check(0..30), None); + } + + #[test] + fn drain_never_returns_ranges_twice_for_same_range() { + let mut tracker = Tracker::new(19); + assert_eq!(tracker.drain(0..19).count(), 1); + assert_eq!(tracker.drain(0..19).count(), 0); + + let mut tracker = Tracker::new(17); + assert_eq!(tracker.drain(5..8).count(), 1); + assert_eq!(tracker.drain(5..8).count(), 0); + assert_eq!(tracker.drain(1..3).count(), 1); + assert_eq!(tracker.drain(1..3).count(), 0); + assert_eq!(tracker.drain(7..13).count(), 1); + assert_eq!(tracker.drain(7..13).count(), 0); + } + + #[test] + fn drain_splits_ranges_correctly() { + let mut tracker = Tracker::new(1337); + assert_eq!( + tracker.drain(21..42).collect::<Vec<Range<u32>>>(), + vec![21..42] + ); + assert_eq!( + tracker.drain(900..1000).collect::<Vec<Range<u32>>>(), + vec![900..1000] + ); + + // Splitted ranges. + assert_eq!( + tracker.drain(5..1003).collect::<Vec<Range<u32>>>(), + vec![5..21, 42..900, 1000..1003] + ); + assert_eq!( + tracker.drain(0..1337).collect::<Vec<Range<u32>>>(), + vec![0..5, 1003..1337] + ); + } + + #[test] + fn discard_adds_range_on_cleared() { + let mut tracker = Tracker::new(10); + tracker.drain(0..10); + tracker.discard(0); + tracker.discard(5); + tracker.discard(9); + assert_eq!(tracker.check(0..1), Some(0..1)); + assert_eq!(tracker.check(1..5), None); + assert_eq!(tracker.check(5..6), Some(5..6)); + assert_eq!(tracker.check(6..9), None); + assert_eq!(tracker.check(9..10), Some(9..10)); + } + + #[test] + fn discard_does_nothing_on_uncleared() { + let mut tracker = Tracker::new(10); + tracker.discard(0); + tracker.discard(5); + tracker.discard(9); + assert_eq!(tracker.uninitialized_ranges.len(), 1); + assert_eq!(tracker.uninitialized_ranges[0], 0..10); + } + + #[test] + fn discard_extends_ranges() { + let mut tracker = Tracker::new(10); + tracker.drain(3..7); + tracker.discard(2); + tracker.discard(7); + assert_eq!(tracker.uninitialized_ranges.len(), 2); + assert_eq!(tracker.uninitialized_ranges[0], 0..3); + assert_eq!(tracker.uninitialized_ranges[1], 7..10); + } + + #[test] + fn discard_merges_ranges() { + let mut tracker = Tracker::new(10); + tracker.drain(3..4); + tracker.discard(3); + assert_eq!(tracker.uninitialized_ranges.len(), 1); + assert_eq!(tracker.uninitialized_ranges[0], 0..10); + } +} diff --git a/third_party/rust/wgpu-core/src/init_tracker/texture.rs b/third_party/rust/wgpu-core/src/init_tracker/texture.rs new file mode 100644 index 0000000000..90167e6fa6 --- /dev/null +++ b/third_party/rust/wgpu-core/src/init_tracker/texture.rs @@ -0,0 +1,103 @@ +use super::{InitTracker, MemoryInitKind}; +use crate::{id::TextureId, track::TextureSelector}; +use arrayvec::ArrayVec; +use std::ops::Range; + +#[derive(Debug, Clone)] +pub(crate) struct TextureInitRange { + pub(crate) mip_range: Range<u32>, + // Strictly array layers. We do *not* track volume slices separately. + pub(crate) layer_range: Range<u32>, +} + +// Returns true if a copy operation doesn't fully cover the texture init +// tracking granularity. I.e. if this function returns true for a pending copy +// operation, the target texture needs to be ensured to be initialized first! +pub(crate) fn has_copy_partial_init_tracker_coverage( + copy_size: &wgt::Extent3d, + mip_level: u32, + desc: &wgt::TextureDescriptor<()>, +) -> bool { + let target_size = desc.mip_level_size(mip_level).unwrap(); + copy_size.width != target_size.width + || copy_size.height != target_size.height + || (desc.dimension == wgt::TextureDimension::D3 + && copy_size.depth_or_array_layers != target_size.depth_or_array_layers) +} + +impl From<TextureSelector> for TextureInitRange { + fn from(selector: TextureSelector) -> Self { + TextureInitRange { + mip_range: selector.mips, + layer_range: selector.layers, + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct TextureInitTrackerAction { + pub(crate) id: TextureId, + pub(crate) range: TextureInitRange, + pub(crate) kind: MemoryInitKind, +} + +pub(crate) type TextureLayerInitTracker = InitTracker<u32>; + +#[derive(Debug)] +pub(crate) struct TextureInitTracker { + pub mips: ArrayVec<TextureLayerInitTracker, { hal::MAX_MIP_LEVELS as usize }>, +} + +impl TextureInitTracker { + pub(crate) fn new(mip_level_count: u32, depth_or_array_layers: u32) -> Self { + TextureInitTracker { + mips: std::iter::repeat(TextureLayerInitTracker::new(depth_or_array_layers)) + .take(mip_level_count as usize) + .collect(), + } + } + + pub(crate) fn check_action( + &self, + action: &TextureInitTrackerAction, + ) -> Option<TextureInitTrackerAction> { + let mut mip_range_start = std::usize::MAX; + let mut mip_range_end = std::usize::MIN; + let mut layer_range_start = std::u32::MAX; + let mut layer_range_end = std::u32::MIN; + + for (i, mip_tracker) in self + .mips + .iter() + .enumerate() + .take(action.range.mip_range.end as usize) + .skip(action.range.mip_range.start as usize) + { + if let Some(uninitialized_layer_range) = + mip_tracker.check(action.range.layer_range.clone()) + { + mip_range_start = mip_range_start.min(i); + mip_range_end = i + 1; + layer_range_start = layer_range_start.min(uninitialized_layer_range.start); + layer_range_end = layer_range_end.max(uninitialized_layer_range.end); + }; + } + + if mip_range_start < mip_range_end && layer_range_start < layer_range_end { + Some(TextureInitTrackerAction { + id: action.id, + range: TextureInitRange { + mip_range: mip_range_start as u32..mip_range_end as u32, + layer_range: layer_range_start..layer_range_end, + }, + kind: action.kind, + }) + } else { + None + } + } + + pub(crate) fn discard(&mut self, mip_level: u32, layer: u32) { + self.mips[mip_level as usize].discard(layer); + } +} diff --git a/third_party/rust/wgpu-core/src/instance.rs b/third_party/rust/wgpu-core/src/instance.rs new file mode 100644 index 0000000000..5bbf03543f --- /dev/null +++ b/third_party/rust/wgpu-core/src/instance.rs @@ -0,0 +1,1083 @@ +use crate::{ + device::{Device, DeviceDescriptor}, + hub::{Global, GlobalIdentityHandlerFactory, HalApi, Input, Token}, + id::{AdapterId, DeviceId, SurfaceId, Valid}, + present::Presentation, + LabelHelpers, LifeGuard, Stored, DOWNLEVEL_WARNING_MESSAGE, +}; + +use wgt::{Backend, Backends, PowerPreference}; + +use hal::{Adapter as _, Instance as _}; +use thiserror::Error; + +pub type RequestAdapterOptions = wgt::RequestAdapterOptions<SurfaceId>; +type HalInstance<A> = <A as hal::Api>::Instance; +//TODO: remove this +pub struct HalSurface<A: hal::Api> { + pub raw: A::Surface, + //pub acquired_texture: Option<A::SurfaceTexture>, +} + +#[derive(Clone, Debug, Error)] +#[error("Limit '{name}' value {requested} is better than allowed {allowed}")] +pub struct FailedLimit { + name: &'static str, + requested: u64, + allowed: u64, +} + +fn check_limits(requested: &wgt::Limits, allowed: &wgt::Limits) -> Vec<FailedLimit> { + let mut failed = Vec::new(); + + requested.check_limits_with_fail_fn(allowed, false, |name, requested, allowed| { + failed.push(FailedLimit { + name, + requested, + allowed, + }) + }); + + failed +} + +#[test] +fn downlevel_default_limits_less_than_default_limits() { + let res = check_limits(&wgt::Limits::downlevel_defaults(), &wgt::Limits::default()); + assert!( + res.is_empty(), + "Downlevel limits are greater than default limits", + ) +} + +#[derive(Default)] +pub struct Instance { + #[allow(dead_code)] + pub name: String, + #[cfg(feature = "vulkan")] + pub vulkan: Option<HalInstance<hal::api::Vulkan>>, + #[cfg(feature = "metal")] + pub metal: Option<HalInstance<hal::api::Metal>>, + #[cfg(feature = "dx12")] + pub dx12: Option<HalInstance<hal::api::Dx12>>, + #[cfg(feature = "dx11")] + pub dx11: Option<HalInstance<hal::api::Dx11>>, + #[cfg(feature = "gles")] + pub gl: Option<HalInstance<hal::api::Gles>>, +} + +impl Instance { + pub fn new(name: &str, backends: Backends) -> Self { + fn init<A: HalApi>(_: A, mask: Backends) -> Option<A::Instance> { + if mask.contains(A::VARIANT.into()) { + let mut flags = hal::InstanceFlags::empty(); + if cfg!(debug_assertions) { + flags |= hal::InstanceFlags::VALIDATION; + flags |= hal::InstanceFlags::DEBUG; + } + let hal_desc = hal::InstanceDescriptor { + name: "wgpu", + flags, + }; + unsafe { hal::Instance::init(&hal_desc).ok() } + } else { + None + } + } + + Self { + name: name.to_string(), + #[cfg(feature = "vulkan")] + vulkan: init(hal::api::Vulkan, backends), + #[cfg(feature = "metal")] + metal: init(hal::api::Metal, backends), + #[cfg(feature = "dx12")] + dx12: init(hal::api::Dx12, backends), + #[cfg(feature = "dx11")] + dx11: init(hal::api::Dx11, backends), + #[cfg(feature = "gles")] + gl: init(hal::api::Gles, backends), + } + } + + pub(crate) fn destroy_surface(&self, surface: Surface) { + fn destroy<A: HalApi>( + _: A, + instance: &Option<A::Instance>, + surface: Option<HalSurface<A>>, + ) { + unsafe { + if let Some(suf) = surface { + instance.as_ref().unwrap().destroy_surface(suf.raw); + } + } + } + #[cfg(feature = "vulkan")] + destroy(hal::api::Vulkan, &self.vulkan, surface.vulkan); + #[cfg(feature = "metal")] + destroy(hal::api::Metal, &self.metal, surface.metal); + #[cfg(feature = "dx12")] + destroy(hal::api::Dx12, &self.dx12, surface.dx12); + #[cfg(feature = "dx11")] + destroy(hal::api::Dx11, &self.dx11, surface.dx11); + #[cfg(feature = "gles")] + destroy(hal::api::Gles, &self.gl, surface.gl); + } +} + +pub struct Surface { + pub(crate) presentation: Option<Presentation>, + #[cfg(feature = "vulkan")] + pub vulkan: Option<HalSurface<hal::api::Vulkan>>, + #[cfg(feature = "metal")] + pub metal: Option<HalSurface<hal::api::Metal>>, + #[cfg(feature = "dx12")] + pub dx12: Option<HalSurface<hal::api::Dx12>>, + #[cfg(feature = "dx11")] + pub dx11: Option<HalSurface<hal::api::Dx11>>, + #[cfg(feature = "gles")] + pub gl: Option<HalSurface<hal::api::Gles>>, +} + +impl crate::hub::Resource for Surface { + const TYPE: &'static str = "Surface"; + + fn life_guard(&self) -> &LifeGuard { + unreachable!() + } + + fn label(&self) -> &str { + "<Surface>" + } +} + +impl Surface { + pub fn get_capabilities<A: HalApi>( + &self, + adapter: &Adapter<A>, + ) -> Result<hal::SurfaceCapabilities, GetSurfaceSupportError> { + let suf = A::get_surface(self).ok_or(GetSurfaceSupportError::Unsupported)?; + profiling::scope!("surface_capabilities"); + let caps = unsafe { + adapter + .raw + .adapter + .surface_capabilities(&suf.raw) + .ok_or(GetSurfaceSupportError::Unsupported)? + }; + + Ok(caps) + } +} + +pub struct Adapter<A: hal::Api> { + pub(crate) raw: hal::ExposedAdapter<A>, + life_guard: LifeGuard, +} + +impl<A: HalApi> Adapter<A> { + fn new(mut raw: hal::ExposedAdapter<A>) -> Self { + // WebGPU requires this offset alignment as lower bound on all adapters. + const MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND: u32 = 32; + + let limits = &mut raw.capabilities.limits; + + limits.min_uniform_buffer_offset_alignment = limits + .min_uniform_buffer_offset_alignment + .max(MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND); + limits.min_storage_buffer_offset_alignment = limits + .min_storage_buffer_offset_alignment + .max(MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND); + + Self { + raw, + life_guard: LifeGuard::new("<Adapter>"), + } + } + + pub fn is_surface_supported(&self, surface: &Surface) -> bool { + let suf = A::get_surface(surface); + + // If get_surface returns None, then the API does not advertise support for the surface. + // + // This could occur if the user is running their app on Wayland but Vulkan does not support + // VK_KHR_wayland_surface. + match suf { + Some(suf) => unsafe { self.raw.adapter.surface_capabilities(&suf.raw) }.is_some(), + None => false, + } + } + + pub(crate) fn get_texture_format_features( + &self, + format: wgt::TextureFormat, + ) -> wgt::TextureFormatFeatures { + use hal::TextureFormatCapabilities as Tfc; + + let caps = unsafe { self.raw.adapter.texture_format_capabilities(format) }; + let mut allowed_usages = wgt::TextureUsages::empty(); + + allowed_usages.set(wgt::TextureUsages::COPY_SRC, caps.contains(Tfc::COPY_SRC)); + allowed_usages.set(wgt::TextureUsages::COPY_DST, caps.contains(Tfc::COPY_DST)); + allowed_usages.set( + wgt::TextureUsages::TEXTURE_BINDING, + caps.contains(Tfc::SAMPLED), + ); + allowed_usages.set( + wgt::TextureUsages::STORAGE_BINDING, + caps.contains(Tfc::STORAGE), + ); + allowed_usages.set( + wgt::TextureUsages::RENDER_ATTACHMENT, + caps.intersects(Tfc::COLOR_ATTACHMENT | Tfc::DEPTH_STENCIL_ATTACHMENT), + ); + + let mut flags = wgt::TextureFormatFeatureFlags::empty(); + flags.set( + wgt::TextureFormatFeatureFlags::STORAGE_ATOMICS, + caps.contains(Tfc::STORAGE_ATOMIC), + ); + flags.set( + wgt::TextureFormatFeatureFlags::STORAGE_READ_WRITE, + caps.contains(Tfc::STORAGE_READ_WRITE), + ); + + flags.set( + wgt::TextureFormatFeatureFlags::FILTERABLE, + caps.contains(Tfc::SAMPLED_LINEAR), + ); + + flags.set( + wgt::TextureFormatFeatureFlags::BLENDABLE, + caps.contains(Tfc::COLOR_ATTACHMENT_BLEND), + ); + + flags.set( + wgt::TextureFormatFeatureFlags::MULTISAMPLE_X2, + caps.contains(Tfc::MULTISAMPLE_X2), + ); + flags.set( + wgt::TextureFormatFeatureFlags::MULTISAMPLE_X4, + caps.contains(Tfc::MULTISAMPLE_X4), + ); + flags.set( + wgt::TextureFormatFeatureFlags::MULTISAMPLE_X8, + caps.contains(Tfc::MULTISAMPLE_X8), + ); + + flags.set( + wgt::TextureFormatFeatureFlags::MULTISAMPLE_RESOLVE, + caps.contains(Tfc::MULTISAMPLE_RESOLVE), + ); + + wgt::TextureFormatFeatures { + allowed_usages, + flags, + } + } + + fn create_device_from_hal( + &self, + self_id: AdapterId, + open: hal::OpenDevice<A>, + desc: &DeviceDescriptor, + trace_path: Option<&std::path::Path>, + ) -> Result<Device<A>, RequestDeviceError> { + let caps = &self.raw.capabilities; + Device::new( + open, + Stored { + value: Valid(self_id), + ref_count: self.life_guard.add_ref(), + }, + caps.alignments.clone(), + caps.downlevel.clone(), + desc, + trace_path, + ) + .or(Err(RequestDeviceError::OutOfMemory)) + } + + fn create_device( + &self, + self_id: AdapterId, + desc: &DeviceDescriptor, + trace_path: Option<&std::path::Path>, + ) -> Result<Device<A>, RequestDeviceError> { + // Verify all features were exposed by the adapter + if !self.raw.features.contains(desc.features) { + return Err(RequestDeviceError::UnsupportedFeature( + desc.features - self.raw.features, + )); + } + + let caps = &self.raw.capabilities; + if wgt::Backends::PRIMARY.contains(wgt::Backends::from(A::VARIANT)) + && !caps.downlevel.is_webgpu_compliant() + { + let missing_flags = wgt::DownlevelFlags::compliant() - caps.downlevel.flags; + log::warn!( + "Missing downlevel flags: {:?}\n{}", + missing_flags, + DOWNLEVEL_WARNING_MESSAGE + ); + log::info!("{:#?}", caps.downlevel); + } + + // Verify feature preconditions + if desc + .features + .contains(wgt::Features::MAPPABLE_PRIMARY_BUFFERS) + && self.raw.info.device_type == wgt::DeviceType::DiscreteGpu + { + log::warn!( + "Feature MAPPABLE_PRIMARY_BUFFERS enabled on a discrete gpu. \ + This is a massive performance footgun and likely not what you wanted" + ); + } + + if let Some(_) = desc.label { + //TODO + } + + if let Some(failed) = check_limits(&desc.limits, &caps.limits).pop() { + return Err(RequestDeviceError::LimitsExceeded(failed)); + } + + let open = unsafe { self.raw.adapter.open(desc.features, &desc.limits) }.map_err( + |err| match err { + hal::DeviceError::Lost => RequestDeviceError::DeviceLost, + hal::DeviceError::OutOfMemory => RequestDeviceError::OutOfMemory, + }, + )?; + + self.create_device_from_hal(self_id, open, desc, trace_path) + } +} + +impl<A: hal::Api> crate::hub::Resource for Adapter<A> { + const TYPE: &'static str = "Adapter"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +#[derive(Clone, Debug, Error)] +pub enum IsSurfaceSupportedError { + #[error("invalid adapter")] + InvalidAdapter, + #[error("invalid surface")] + InvalidSurface, +} + +#[derive(Clone, Debug, Error)] +pub enum GetSurfaceSupportError { + #[error("invalid adapter")] + InvalidAdapter, + #[error("invalid surface")] + InvalidSurface, + #[error("surface is not supported by the adapter")] + Unsupported, +} + +#[derive(Clone, Debug, Error)] +/// Error when requesting a device from the adaptor +pub enum RequestDeviceError { + #[error("parent adapter is invalid")] + InvalidAdapter, + #[error("connection to device was lost during initialization")] + DeviceLost, + #[error("device initialization failed due to implementation specific errors")] + Internal, + #[error(transparent)] + LimitsExceeded(#[from] FailedLimit), + #[error("device has no queue supporting graphics")] + NoGraphicsQueue, + #[error("not enough memory left")] + OutOfMemory, + #[error("unsupported features were requested: {0:?}")] + UnsupportedFeature(wgt::Features), +} + +pub enum AdapterInputs<'a, I> { + IdSet(&'a [I], fn(&I) -> Backend), + Mask(Backends, fn(Backend) -> I), +} + +impl<I: Clone> AdapterInputs<'_, I> { + fn find(&self, b: Backend) -> Option<I> { + match *self { + Self::IdSet(ids, ref fun) => ids.iter().find(|id| fun(id) == b).cloned(), + Self::Mask(bits, ref fun) => { + if bits.contains(b.into()) { + Some(fun(b)) + } else { + None + } + } + } + } +} + +#[derive(Clone, Debug, Error)] +#[error("adapter is invalid")] +pub struct InvalidAdapter; + +#[derive(Clone, Debug, Error)] +pub enum RequestAdapterError { + #[error("no suitable adapter found")] + NotFound, + #[error("surface {0:?} is invalid")] + InvalidSurface(SurfaceId), +} + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + #[cfg(feature = "raw-window-handle")] + pub fn instance_create_surface( + &self, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + id_in: Input<G, SurfaceId>, + ) -> SurfaceId { + profiling::scope!("Instance::create_surface"); + + fn init<A: hal::Api>( + inst: &Option<A::Instance>, + display_handle: raw_window_handle::RawDisplayHandle, + window_handle: raw_window_handle::RawWindowHandle, + ) -> Option<HalSurface<A>> { + inst.as_ref().and_then(|inst| unsafe { + match inst.create_surface(display_handle, window_handle) { + Ok(raw) => Some(HalSurface { + raw, + //acquired_texture: None, + }), + Err(e) => { + log::warn!("Error: {:?}", e); + None + } + } + }) + } + + let surface = Surface { + presentation: None, + #[cfg(feature = "vulkan")] + vulkan: init::<hal::api::Vulkan>(&self.instance.vulkan, display_handle, window_handle), + #[cfg(feature = "metal")] + metal: init::<hal::api::Metal>(&self.instance.metal, display_handle, window_handle), + #[cfg(feature = "dx12")] + dx12: init::<hal::api::Dx12>(&self.instance.dx12, display_handle, window_handle), + #[cfg(feature = "dx11")] + dx11: init::<hal::api::Dx11>(&self.instance.dx11, display_handle, window_handle), + #[cfg(feature = "gles")] + gl: init::<hal::api::Gles>(&self.instance.gl, display_handle, window_handle), + }; + + let mut token = Token::root(); + let id = self.surfaces.prepare(id_in).assign(surface, &mut token); + id.0 + } + + #[cfg(feature = "metal")] + pub fn instance_create_surface_metal( + &self, + layer: *mut std::ffi::c_void, + id_in: Input<G, SurfaceId>, + ) -> SurfaceId { + profiling::scope!("Instance::create_surface_metal"); + + let surface = Surface { + presentation: None, + metal: self.instance.metal.as_ref().map(|inst| HalSurface { + raw: { + // we don't want to link to metal-rs for this + #[allow(clippy::transmute_ptr_to_ref)] + inst.create_surface_from_layer(unsafe { std::mem::transmute(layer) }) + }, + //acquired_texture: None, + }), + #[cfg(feature = "vulkan")] + vulkan: None, + #[cfg(feature = "gles")] + gl: None, + }; + + let mut token = Token::root(); + let id = self.surfaces.prepare(id_in).assign(surface, &mut token); + id.0 + } + + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + pub fn create_surface_webgl_canvas( + &self, + canvas: &web_sys::HtmlCanvasElement, + id_in: Input<G, SurfaceId>, + ) -> Result<SurfaceId, hal::InstanceError> { + profiling::scope!("Instance::create_surface_webgl_canvas"); + + let surface = Surface { + presentation: None, + gl: self + .instance + .gl + .as_ref() + .map(|inst| { + Ok(HalSurface { + raw: inst.create_surface_from_canvas(canvas)?, + }) + }) + .transpose()?, + }; + + let mut token = Token::root(); + let id = self.surfaces.prepare(id_in).assign(surface, &mut token); + Ok(id.0) + } + + #[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))] + pub fn create_surface_webgl_offscreen_canvas( + &self, + canvas: &web_sys::OffscreenCanvas, + id_in: Input<G, SurfaceId>, + ) -> Result<SurfaceId, hal::InstanceError> { + profiling::scope!("Instance::create_surface_webgl_offscreen_canvas"); + + let surface = Surface { + presentation: None, + gl: self + .instance + .gl + .as_ref() + .map(|inst| { + Ok(HalSurface { + raw: inst.create_surface_from_offscreen_canvas(canvas)?, + }) + }) + .transpose()?, + }; + + let mut token = Token::root(); + let id = self.surfaces.prepare(id_in).assign(surface, &mut token); + Ok(id.0) + } + + #[cfg(feature = "dx12")] + /// # Safety + /// + /// The visual must be valid and able to be used to make a swapchain with. + pub unsafe fn instance_create_surface_from_visual( + &self, + visual: *mut std::ffi::c_void, + id_in: Input<G, SurfaceId>, + ) -> SurfaceId { + profiling::scope!("Instance::instance_create_surface_from_visual"); + + let surface = Surface { + presentation: None, + #[cfg(feature = "vulkan")] + vulkan: None, + dx12: self.instance.dx12.as_ref().map(|inst| HalSurface { + raw: unsafe { inst.create_surface_from_visual(visual as _) }, + }), + dx11: None, + #[cfg(feature = "gles")] + gl: None, + }; + + let mut token = Token::root(); + let id = self.surfaces.prepare(id_in).assign(surface, &mut token); + id.0 + } + + pub fn surface_drop(&self, id: SurfaceId) { + profiling::scope!("Surface::drop"); + let mut token = Token::root(); + let (surface, _) = self.surfaces.unregister(id, &mut token); + self.instance.destroy_surface(surface.unwrap()); + } + + fn enumerate<A: HalApi>( + &self, + _: A, + instance: &Option<A::Instance>, + inputs: &AdapterInputs<Input<G, AdapterId>>, + list: &mut Vec<AdapterId>, + ) { + let inst = match *instance { + Some(ref inst) => inst, + None => return, + }; + let id_backend = match inputs.find(A::VARIANT) { + Some(id) => id, + None => return, + }; + + profiling::scope!("enumerating", &*format!("{:?}", A::VARIANT)); + let hub = HalApi::hub(self); + let mut token = Token::root(); + + let hal_adapters = unsafe { inst.enumerate_adapters() }; + for raw in hal_adapters { + let adapter = Adapter::new(raw); + log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info); + let id = hub + .adapters + .prepare(id_backend.clone()) + .assign(adapter, &mut token); + list.push(id.0); + } + } + + pub fn enumerate_adapters(&self, inputs: AdapterInputs<Input<G, AdapterId>>) -> Vec<AdapterId> { + profiling::scope!("Instance::enumerate_adapters"); + + let mut adapters = Vec::new(); + + #[cfg(feature = "vulkan")] + self.enumerate( + hal::api::Vulkan, + &self.instance.vulkan, + &inputs, + &mut adapters, + ); + #[cfg(feature = "metal")] + self.enumerate( + hal::api::Metal, + &self.instance.metal, + &inputs, + &mut adapters, + ); + #[cfg(feature = "dx12")] + self.enumerate(hal::api::Dx12, &self.instance.dx12, &inputs, &mut adapters); + #[cfg(feature = "dx11")] + self.enumerate(hal::api::Dx11, &self.instance.dx11, &inputs, &mut adapters); + #[cfg(feature = "gles")] + self.enumerate(hal::api::Gles, &self.instance.gl, &inputs, &mut adapters); + + adapters + } + + fn select<A: HalApi>( + &self, + selected: &mut usize, + new_id: Option<Input<G, AdapterId>>, + mut list: Vec<hal::ExposedAdapter<A>>, + ) -> Option<AdapterId> { + match selected.checked_sub(list.len()) { + Some(left) => { + *selected = left; + None + } + None => { + let mut token = Token::root(); + let adapter = Adapter::new(list.swap_remove(*selected)); + log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info); + let id = HalApi::hub(self) + .adapters + .prepare(new_id.unwrap()) + .assign(adapter, &mut token); + Some(id.0) + } + } + } + + pub fn request_adapter( + &self, + desc: &RequestAdapterOptions, + inputs: AdapterInputs<Input<G, AdapterId>>, + ) -> Result<AdapterId, RequestAdapterError> { + profiling::scope!("Instance::pick_adapter"); + + fn gather<A: HalApi, I: Clone>( + _: A, + instance: Option<&A::Instance>, + inputs: &AdapterInputs<I>, + compatible_surface: Option<&Surface>, + force_software: bool, + device_types: &mut Vec<wgt::DeviceType>, + ) -> (Option<I>, Vec<hal::ExposedAdapter<A>>) { + let id = inputs.find(A::VARIANT); + match instance { + Some(inst) if id.is_some() => { + let mut adapters = unsafe { inst.enumerate_adapters() }; + if force_software { + adapters.retain(|exposed| exposed.info.device_type == wgt::DeviceType::Cpu); + } + if let Some(surface) = compatible_surface { + let surface = &A::get_surface(surface); + adapters.retain(|exposed| unsafe { + // If the surface does not exist for this backend, + // then the surface is not supported. + surface.is_some() + && exposed + .adapter + .surface_capabilities(&surface.unwrap().raw) + .is_some() + }); + } + device_types.extend(adapters.iter().map(|ad| ad.info.device_type)); + (id, adapters) + } + _ => (id, Vec::new()), + } + } + + let mut token = Token::root(); + let (surface_guard, _) = self.surfaces.read(&mut token); + let compatible_surface = desc + .compatible_surface + .map(|id| { + surface_guard + .get(id) + .map_err(|_| RequestAdapterError::InvalidSurface(id)) + }) + .transpose()?; + let mut device_types = Vec::new(); + + #[cfg(feature = "vulkan")] + let (id_vulkan, adapters_vk) = gather( + hal::api::Vulkan, + self.instance.vulkan.as_ref(), + &inputs, + compatible_surface, + desc.force_fallback_adapter, + &mut device_types, + ); + #[cfg(feature = "metal")] + let (id_metal, adapters_metal) = gather( + hal::api::Metal, + self.instance.metal.as_ref(), + &inputs, + compatible_surface, + desc.force_fallback_adapter, + &mut device_types, + ); + #[cfg(feature = "dx12")] + let (id_dx12, adapters_dx12) = gather( + hal::api::Dx12, + self.instance.dx12.as_ref(), + &inputs, + compatible_surface, + desc.force_fallback_adapter, + &mut device_types, + ); + #[cfg(feature = "dx11")] + let (id_dx11, adapters_dx11) = gather( + hal::api::Dx11, + self.instance.dx11.as_ref(), + &inputs, + compatible_surface, + desc.force_fallback_adapter, + &mut device_types, + ); + #[cfg(feature = "gles")] + let (id_gl, adapters_gl) = gather( + hal::api::Gles, + self.instance.gl.as_ref(), + &inputs, + compatible_surface, + desc.force_fallback_adapter, + &mut device_types, + ); + + // need to free the token to be used by `select` + drop(surface_guard); + drop(token); + if device_types.is_empty() { + return Err(RequestAdapterError::NotFound); + } + + let (mut integrated, mut discrete, mut virt, mut cpu, mut other) = + (None, None, None, None, None); + + for (i, ty) in device_types.into_iter().enumerate() { + match ty { + wgt::DeviceType::IntegratedGpu => { + integrated = integrated.or(Some(i)); + } + wgt::DeviceType::DiscreteGpu => { + discrete = discrete.or(Some(i)); + } + wgt::DeviceType::VirtualGpu => { + virt = virt.or(Some(i)); + } + wgt::DeviceType::Cpu => { + cpu = cpu.or(Some(i)); + } + wgt::DeviceType::Other => { + other = other.or(Some(i)); + } + } + } + + let preferred_gpu = match desc.power_preference { + // Since devices of type "Other" might really be "Unknown" and come + // from APIs like OpenGL that don't specify device type, Prefer more + // Specific types over Other. + // + // This means that backends which do provide accurate device types + // will be preferred if their device type indicates an actual + // hardware GPU (integrated or discrete). + PowerPreference::LowPower => integrated.or(discrete).or(other).or(virt).or(cpu), + PowerPreference::HighPerformance => discrete.or(integrated).or(other).or(virt).or(cpu), + }; + + let mut selected = preferred_gpu.unwrap_or(0); + #[cfg(feature = "vulkan")] + if let Some(id) = self.select(&mut selected, id_vulkan, adapters_vk) { + return Ok(id); + } + #[cfg(feature = "metal")] + if let Some(id) = self.select(&mut selected, id_metal, adapters_metal) { + return Ok(id); + } + #[cfg(feature = "dx12")] + if let Some(id) = self.select(&mut selected, id_dx12, adapters_dx12) { + return Ok(id); + } + #[cfg(feature = "dx11")] + if let Some(id) = self.select(&mut selected, id_dx11, adapters_dx11) { + return Ok(id); + } + #[cfg(feature = "gles")] + if let Some(id) = self.select(&mut selected, id_gl, adapters_gl) { + return Ok(id); + } + let _ = selected; + + log::warn!("Some adapters are present, but enumerating them failed!"); + Err(RequestAdapterError::NotFound) + } + + /// # Safety + /// + /// `hal_adapter` must be created from this global internal instance handle. + pub unsafe fn create_adapter_from_hal<A: HalApi>( + &self, + hal_adapter: hal::ExposedAdapter<A>, + input: Input<G, AdapterId>, + ) -> AdapterId { + profiling::scope!("Instance::create_adapter_from_hal"); + + let mut token = Token::root(); + let fid = A::hub(self).adapters.prepare(input); + + match A::VARIANT { + #[cfg(feature = "vulkan")] + Backend::Vulkan => fid.assign(Adapter::new(hal_adapter), &mut token).0, + #[cfg(feature = "metal")] + Backend::Metal => fid.assign(Adapter::new(hal_adapter), &mut token).0, + #[cfg(feature = "dx12")] + Backend::Dx12 => fid.assign(Adapter::new(hal_adapter), &mut token).0, + #[cfg(feature = "dx11")] + Backend::Dx11 => fid.assign(Adapter::new(hal_adapter), &mut token).0, + #[cfg(feature = "gles")] + Backend::Gl => fid.assign(Adapter::new(hal_adapter), &mut token).0, + _ => unreachable!(), + } + } + + pub fn adapter_get_info<A: HalApi>( + &self, + adapter_id: AdapterId, + ) -> Result<wgt::AdapterInfo, InvalidAdapter> { + let hub = A::hub(self); + let mut token = Token::root(); + let (adapter_guard, _) = hub.adapters.read(&mut token); + adapter_guard + .get(adapter_id) + .map(|adapter| adapter.raw.info.clone()) + .map_err(|_| InvalidAdapter) + } + + pub fn adapter_get_texture_format_features<A: HalApi>( + &self, + adapter_id: AdapterId, + format: wgt::TextureFormat, + ) -> Result<wgt::TextureFormatFeatures, InvalidAdapter> { + let hub = A::hub(self); + let mut token = Token::root(); + let (adapter_guard, _) = hub.adapters.read(&mut token); + adapter_guard + .get(adapter_id) + .map(|adapter| adapter.get_texture_format_features(format)) + .map_err(|_| InvalidAdapter) + } + + pub fn adapter_features<A: HalApi>( + &self, + adapter_id: AdapterId, + ) -> Result<wgt::Features, InvalidAdapter> { + let hub = A::hub(self); + let mut token = Token::root(); + let (adapter_guard, _) = hub.adapters.read(&mut token); + adapter_guard + .get(adapter_id) + .map(|adapter| adapter.raw.features) + .map_err(|_| InvalidAdapter) + } + + pub fn adapter_limits<A: HalApi>( + &self, + adapter_id: AdapterId, + ) -> Result<wgt::Limits, InvalidAdapter> { + let hub = A::hub(self); + let mut token = Token::root(); + let (adapter_guard, _) = hub.adapters.read(&mut token); + adapter_guard + .get(adapter_id) + .map(|adapter| adapter.raw.capabilities.limits.clone()) + .map_err(|_| InvalidAdapter) + } + + pub fn adapter_downlevel_capabilities<A: HalApi>( + &self, + adapter_id: AdapterId, + ) -> Result<wgt::DownlevelCapabilities, InvalidAdapter> { + let hub = A::hub(self); + let mut token = Token::root(); + let (adapter_guard, _) = hub.adapters.read(&mut token); + adapter_guard + .get(adapter_id) + .map(|adapter| adapter.raw.capabilities.downlevel.clone()) + .map_err(|_| InvalidAdapter) + } + + pub fn adapter_get_presentation_timestamp<A: HalApi>( + &self, + adapter_id: AdapterId, + ) -> Result<wgt::PresentationTimestamp, InvalidAdapter> { + let hub = A::hub(self); + let mut token = Token::root(); + let (adapter_guard, _) = hub.adapters.read(&mut token); + let adapter = adapter_guard.get(adapter_id).map_err(|_| InvalidAdapter)?; + + Ok(unsafe { adapter.raw.adapter.get_presentation_timestamp() }) + } + + pub fn adapter_drop<A: HalApi>(&self, adapter_id: AdapterId) { + profiling::scope!("Adapter::drop"); + + let hub = A::hub(self); + let mut token = Token::root(); + let (mut adapter_guard, _) = hub.adapters.write(&mut token); + + let free = match adapter_guard.get_mut(adapter_id) { + Ok(adapter) => adapter.life_guard.ref_count.take().unwrap().load() == 1, + Err(_) => true, + }; + if free { + hub.adapters + .unregister_locked(adapter_id, &mut *adapter_guard); + } + } +} + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + pub fn adapter_request_device<A: HalApi>( + &self, + adapter_id: AdapterId, + desc: &DeviceDescriptor, + trace_path: Option<&std::path::Path>, + id_in: Input<G, DeviceId>, + ) -> (DeviceId, Option<RequestDeviceError>) { + profiling::scope!("Adapter::request_device"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.devices.prepare(id_in); + + let error = loop { + let (adapter_guard, mut token) = hub.adapters.read(&mut token); + let adapter = match adapter_guard.get(adapter_id) { + Ok(adapter) => adapter, + Err(_) => break RequestDeviceError::InvalidAdapter, + }; + let device = match adapter.create_device(adapter_id, desc, trace_path) { + Ok(device) => device, + Err(e) => break e, + }; + let id = fid.assign(device, &mut token); + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } + + /// # Safety + /// + /// - `hal_device` must be created from `adapter_id` or its internal handle. + /// - `desc` must be a subset of `hal_device` features and limits. + pub unsafe fn create_device_from_hal<A: HalApi>( + &self, + adapter_id: AdapterId, + hal_device: hal::OpenDevice<A>, + desc: &DeviceDescriptor, + trace_path: Option<&std::path::Path>, + id_in: Input<G, DeviceId>, + ) -> (DeviceId, Option<RequestDeviceError>) { + profiling::scope!("Adapter::create_device_from_hal"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.devices.prepare(id_in); + + let error = loop { + let (adapter_guard, mut token) = hub.adapters.read(&mut token); + let adapter = match adapter_guard.get(adapter_id) { + Ok(adapter) => adapter, + Err(_) => break RequestDeviceError::InvalidAdapter, + }; + let device = + match adapter.create_device_from_hal(adapter_id, hal_device, desc, trace_path) { + Ok(device) => device, + Err(e) => break e, + }; + let id = fid.assign(device, &mut token); + return (id.0, None); + }; + + let id = fid.assign_error(desc.label.borrow_or_default(), &mut token); + (id, Some(error)) + } +} + +/// Generates a set of backends from a comma separated list of case-insensitive backend names. +/// +/// Whitespace is stripped, so both 'gl, dx12' and 'gl,dx12' are valid. +/// +/// Always returns WEBGPU on wasm over webgpu. +/// +/// Names: +/// - vulkan = "vulkan" or "vk" +/// - dx12 = "dx12" or "d3d12" +/// - dx11 = "dx11" or "d3d11" +/// - metal = "metal" or "mtl" +/// - gles = "opengl" or "gles" or "gl" +/// - webgpu = "webgpu" +pub fn parse_backends_from_comma_list(string: &str) -> Backends { + let mut backends = Backends::empty(); + for backend in string.to_lowercase().split(',') { + backends |= match backend.trim() { + "vulkan" | "vk" => Backends::VULKAN, + "dx12" | "d3d12" => Backends::DX12, + "dx11" | "d3d11" => Backends::DX11, + "metal" | "mtl" => Backends::METAL, + "opengl" | "gles" | "gl" => Backends::GL, + "webgpu" => Backends::BROWSER_WEBGPU, + b => { + log::warn!("unknown backend string '{}'", b); + continue; + } + } + } + + if backends.is_empty() { + log::warn!("no valid backend strings found!"); + } + + backends +} diff --git a/third_party/rust/wgpu-core/src/lib.rs b/third_party/rust/wgpu-core/src/lib.rs new file mode 100644 index 0000000000..ff9f3d4b65 --- /dev/null +++ b/third_party/rust/wgpu-core/src/lib.rs @@ -0,0 +1,430 @@ +/*! This library safely implements WebGPU on native platforms. + * It is designed for integration into browsers, as well as wrapping + * into other language-specific user-friendly libraries. + */ + +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![allow( + // It is much clearer to assert negative conditions with eq! false + clippy::bool_assert_comparison, + // We use loops for getting early-out of scope without closures. + clippy::never_loop, + // We don't use syntax sugar where it's not necessary. + clippy::match_like_matches_macro, + // Redundant matching is more explicit. + clippy::redundant_pattern_matching, + // Explicit lifetimes are often easier to reason about. + clippy::needless_lifetimes, + // No need for defaults in the internal types. + clippy::new_without_default, + // Needless updates are more scaleable, easier to play with features. + clippy::needless_update, + // Need many arguments for some core functions to be able to re-use code in many situations. + clippy::too_many_arguments, + // For some reason `rustc` can warn about these in const generics even + // though they are required. + unused_braces, +)] +#![warn( + trivial_casts, + trivial_numeric_casts, + unsafe_op_in_unsafe_fn, + unused_extern_crates, + unused_qualifications, + // We don't match on a reference, unless required. + clippy::pattern_type_mismatch, +)] + +pub mod binding_model; +pub mod command; +mod conv; +pub mod device; +pub mod error; +pub mod hub; +pub mod id; +mod init_tracker; +pub mod instance; +pub mod pipeline; +pub mod present; +pub mod resource; +mod track; +mod validation; + +pub use hal::{api, MAX_BIND_GROUPS, MAX_COLOR_ATTACHMENTS, MAX_VERTEX_BUFFERS}; + +use atomic::{AtomicUsize, Ordering}; + +use std::{borrow::Cow, os::raw::c_char, ptr, sync::atomic}; + +/// The index of a queue submission. +/// +/// These are the values stored in `Device::fence`. +type SubmissionIndex = hal::FenceValue; + +type Index = u32; +type Epoch = u32; + +pub type RawString = *const c_char; +pub type Label<'a> = Option<Cow<'a, str>>; + +trait LabelHelpers<'a> { + fn borrow_option(&'a self) -> Option<&'a str>; + fn borrow_or_default(&'a self) -> &'a str; +} +impl<'a> LabelHelpers<'a> for Label<'a> { + fn borrow_option(&'a self) -> Option<&'a str> { + self.as_ref().map(|cow| cow.as_ref()) + } + fn borrow_or_default(&'a self) -> &'a str { + self.borrow_option().unwrap_or_default() + } +} + +/// Reference count object that is 1:1 with each reference. +/// +/// All the clones of a given `RefCount` point to the same +/// heap-allocated atomic reference count. When the count drops to +/// zero, only the count is freed. No other automatic cleanup takes +/// place; this is just a reference count, not a smart pointer. +/// +/// `RefCount` values are created only by [`LifeGuard::new`] and by +/// `Clone`, so every `RefCount` is implicitly tied to some +/// [`LifeGuard`]. +#[derive(Debug)] +struct RefCount(ptr::NonNull<AtomicUsize>); + +unsafe impl Send for RefCount {} +unsafe impl Sync for RefCount {} + +impl RefCount { + const MAX: usize = 1 << 24; + + /// Construct a new `RefCount`, with an initial count of 1. + fn new() -> RefCount { + let bx = Box::new(AtomicUsize::new(1)); + Self(unsafe { ptr::NonNull::new_unchecked(Box::into_raw(bx)) }) + } + + fn load(&self) -> usize { + unsafe { self.0.as_ref() }.load(Ordering::Acquire) + } +} + +impl Clone for RefCount { + fn clone(&self) -> Self { + let old_size = unsafe { self.0.as_ref() }.fetch_add(1, Ordering::AcqRel); + assert!(old_size < Self::MAX); + Self(self.0) + } +} + +impl Drop for RefCount { + fn drop(&mut self) { + unsafe { + if self.0.as_ref().fetch_sub(1, Ordering::AcqRel) == 1 { + drop(Box::from_raw(self.0.as_ptr())); + } + } + } +} + +/// Reference count object that tracks multiple references. +/// Unlike `RefCount`, it's manually inc()/dec() called. +#[derive(Debug)] +struct MultiRefCount(AtomicUsize); + +impl MultiRefCount { + fn new() -> Self { + Self(AtomicUsize::new(1)) + } + + fn inc(&self) { + self.0.fetch_add(1, Ordering::AcqRel); + } + + fn dec_and_check_empty(&self) -> bool { + self.0.fetch_sub(1, Ordering::AcqRel) == 1 + } +} + +/// Information needed to decide when it's safe to free some wgpu-core +/// resource. +/// +/// Each type representing a `wgpu-core` resource, like [`Device`], +/// [`Buffer`], etc., contains a `LifeGuard` which indicates whether +/// it is safe to free. +/// +/// A resource may need to be retained for any of several reasons: +/// +/// - The user may hold a reference to it (via a `wgpu::Buffer`, say). +/// +/// - Other resources may depend on it (a texture view's backing +/// texture, for example). +/// +/// - It may be used by commands sent to the GPU that have not yet +/// finished execution. +/// +/// [`Device`]: device::Device +/// [`Buffer`]: resource::Buffer +#[derive(Debug)] +pub struct LifeGuard { + /// `RefCount` for the user's reference to this resource. + /// + /// When the user first creates a `wgpu-core` resource, this `RefCount` is + /// created along with the resource's `LifeGuard`. When the user drops the + /// resource, we swap this out for `None`. Note that the resource may + /// still be held alive by other resources. + /// + /// Any `Stored<T>` value holds a clone of this `RefCount` along with the id + /// of a `T` resource. + ref_count: Option<RefCount>, + + /// The index of the last queue submission in which the resource + /// was used. + /// + /// Each queue submission is fenced and assigned an index number + /// sequentially. Thus, when a queue submission completes, we know any + /// resources used in that submission and any lower-numbered submissions are + /// no longer in use by the GPU. + submission_index: AtomicUsize, + + /// The `label` from the descriptor used to create the resource. + #[cfg(debug_assertions)] + pub(crate) label: String, +} + +impl LifeGuard { + #[allow(unused_variables)] + fn new(label: &str) -> Self { + Self { + ref_count: Some(RefCount::new()), + submission_index: AtomicUsize::new(0), + #[cfg(debug_assertions)] + label: label.to_string(), + } + } + + fn add_ref(&self) -> RefCount { + self.ref_count.clone().unwrap() + } + + /// Record that this resource will be used by the queue submission with the + /// given index. + /// + /// Returns `true` if the resource is still held by the user. + fn use_at(&self, submit_index: SubmissionIndex) -> bool { + self.submission_index + .store(submit_index as _, Ordering::Release); + self.ref_count.is_some() + } + + fn life_count(&self) -> SubmissionIndex { + self.submission_index.load(Ordering::Acquire) as _ + } +} + +#[derive(Clone, Debug)] +struct Stored<T> { + value: id::Valid<T>, + ref_count: RefCount, +} + +const DOWNLEVEL_WARNING_MESSAGE: &str = "The underlying API or device in use does not \ +support enough features to be a fully compliant implementation of WebGPU. A subset of the features can still be used. \ +If you are running this program on native and not in a browser and wish to limit the features you use to the supported subset, \ +call Adapter::downlevel_properties or Device::downlevel_properties to get a listing of the features the current \ +platform supports."; +const DOWNLEVEL_ERROR_MESSAGE: &str = "This is not an invalid use of WebGPU: the underlying API or device does not \ +support enough features to be a fully compliant implementation. A subset of the features can still be used. \ +If you are running this program on native and not in a browser and wish to work around this issue, call \ +Adapter::downlevel_properties or Device::downlevel_properties to get a listing of the features the current \ +platform supports."; + +// #[cfg] attributes in exported macros are interesting! +// +// The #[cfg] conditions in a macro's expansion are evaluated using the +// configuration options (features, target architecture and os, etc.) in force +// where the macro is *used*, not where it is *defined*. That is, if crate A +// defines a macro like this: +// +// #[macro_export] +// macro_rules! if_bleep { +// { } => { +// #[cfg(feature = "bleep")] +// bleep(); +// } +// } +// +// and then crate B uses it like this: +// +// fn f() { +// if_bleep! { } +// } +// +// then it is crate B's `"bleep"` feature, not crate A's, that determines +// whether the macro expands to a function call or an empty statement. The +// entire configuration predicate is evaluated in the use's context, not the +// definition's. +// +// Since `wgpu-core` selects back ends using features, we need to make sure the +// arms of the `gfx_select!` macro are pruned according to `wgpu-core`'s +// features, not those of whatever crate happens to be using `gfx_select!`. This +// means we can't use `#[cfg]` attributes in `gfx_select!`s definition itself. +// Instead, for each backend, `gfx_select!` must use a macro whose definition is +// selected by `#[cfg]` in `wgpu-core`. The configuration predicate is still +// evaluated when the macro is used; we've just moved the `#[cfg]` into a macro +// used by `wgpu-core` itself. + +/// Define an exported macro named `$public` that expands to an expression if +/// the feature `$feature` is enabled, or to a panic otherwise. +/// +/// For a call like this: +/// +/// define_backend_caller! { name, hidden_name, feature } +/// +/// define a macro `name`, used like this: +/// +/// name!(expr) +/// +/// that expands to `expr` if `feature` is enabled, or a panic otherwise. +/// +/// Because of odd technical limitations on exporting macros expanded by other +/// macros, you must supply both a public-facing name for the macro and a +/// private name, which is never used outside this macro. For details: +/// <https://github.com/rust-lang/rust/pull/52234#issuecomment-976702997> +macro_rules! define_backend_caller { + { $public:ident, $private:ident if $feature:literal } => { + #[cfg(feature = $feature )] + #[macro_export] + macro_rules! $private { + ( $call:expr ) => ( $call ) + } + + #[cfg(not(feature = $feature ))] + #[macro_export] + macro_rules! $private { + ( $call:expr ) => ( + panic!("Identifier refers to disabled backend feature {:?}", $feature) + ) + } + + // See note about rust-lang#52234 above. + #[doc(hidden)] pub use $private as $public; + } +} + +// Define a macro for each `gfx_select!` match arm. For example, +// +// gfx_if_vulkan!(expr) +// +// expands to `expr` if the `"vulkan"` feature is enabled, or to a panic +// otherwise. +define_backend_caller! { gfx_if_vulkan, gfx_if_vulkan_hidden if "vulkan" } +define_backend_caller! { gfx_if_metal, gfx_if_metal_hidden if "metal" } +define_backend_caller! { gfx_if_dx12, gfx_if_dx12_hidden if "dx12" } +define_backend_caller! { gfx_if_dx11, gfx_if_dx11_hidden if "dx11" } +define_backend_caller! { gfx_if_gles, gfx_if_gles_hidden if "gles" } + +/// Dispatch on an [`Id`]'s backend to a backend-generic method. +/// +/// Uses of this macro have the form: +/// +/// ```ignore +/// +/// gfx_select!(id => value.method(args...)) +/// +/// ``` +/// +/// This expands to an expression that calls `value.method::<A>(args...)` for +/// the backend `A` selected by `id`. The expansion matches on `id.backend()`, +/// with an arm for each backend type in [`wgpu_types::Backend`] which calls the +/// specialization of `method` for the given backend. This allows resource +/// identifiers to select backends dynamically, even though many `wgpu_core` +/// methods are compiled and optimized for a specific back end. +/// +/// This macro is typically used to call methods on [`wgpu_core::hub::Global`], +/// many of which take a single `hal::Api` type parameter. For example, to +/// create a new buffer on the device indicated by `device_id`, one would say: +/// +/// ```ignore +/// gfx_select!(device_id => global.device_create_buffer(device_id, ...)) +/// ``` +/// +/// where the `device_create_buffer` method is defined like this: +/// +/// ```ignore +/// impl<...> Global<...> { +/// pub fn device_create_buffer<A: hal::Api>(&self, ...) -> ... +/// { ... } +/// } +/// ``` +/// +/// That `gfx_select!` call uses `device_id`'s backend to select the right +/// backend type `A` for a call to `Global::device_create_buffer<A>`. +/// +/// However, there's nothing about this macro that is specific to `hub::Global`. +/// For example, Firefox's embedding of `wgpu_core` defines its own types with +/// methods that take `hal::Api` type parameters. Firefox uses `gfx_select!` to +/// dynamically dispatch to the right specialization based on the resource's id. +/// +/// [`wgpu_types::Backend`]: wgt::Backend +/// [`wgpu_core::hub::Global`]: crate::hub::Global +/// [`Id`]: id::Id +#[macro_export] +macro_rules! gfx_select { + ($id:expr => $global:ident.$method:ident( $($param:expr),* )) => { + match $id.backend() { + wgt::Backend::Vulkan => $crate::gfx_if_vulkan!($global.$method::<$crate::api::Vulkan>( $($param),* )), + wgt::Backend::Metal => $crate::gfx_if_metal!($global.$method::<$crate::api::Metal>( $($param),* )), + wgt::Backend::Dx12 => $crate::gfx_if_dx12!($global.$method::<$crate::api::Dx12>( $($param),* )), + wgt::Backend::Dx11 => $crate::gfx_if_dx11!($global.$method::<$crate::api::Dx11>( $($param),* )), + wgt::Backend::Gl => $crate::gfx_if_gles!($global.$method::<$crate::api::Gles>( $($param),+ )), + other => panic!("Unexpected backend {:?}", other), + } + }; +} + +/// Fast hash map used internally. +type FastHashMap<K, V> = + std::collections::HashMap<K, V, std::hash::BuildHasherDefault<fxhash::FxHasher>>; +/// Fast hash set used internally. +type FastHashSet<K> = std::collections::HashSet<K, std::hash::BuildHasherDefault<fxhash::FxHasher>>; + +#[inline] +pub(crate) fn get_lowest_common_denom(a: u32, b: u32) -> u32 { + let gcd = if a >= b { + get_greatest_common_divisor(a, b) + } else { + get_greatest_common_divisor(b, a) + }; + a * b / gcd +} + +#[inline] +pub(crate) fn get_greatest_common_divisor(mut a: u32, mut b: u32) -> u32 { + assert!(a >= b); + loop { + let c = a % b; + if c == 0 { + return b; + } else { + a = b; + b = c; + } + } +} + +#[test] +fn test_lcd() { + assert_eq!(get_lowest_common_denom(2, 2), 2); + assert_eq!(get_lowest_common_denom(2, 3), 6); + assert_eq!(get_lowest_common_denom(6, 4), 12); +} + +#[test] +fn test_gcd() { + assert_eq!(get_greatest_common_divisor(5, 1), 1); + assert_eq!(get_greatest_common_divisor(4, 2), 2); + assert_eq!(get_greatest_common_divisor(6, 4), 2); + assert_eq!(get_greatest_common_divisor(7, 7), 7); +} diff --git a/third_party/rust/wgpu-core/src/pipeline.rs b/third_party/rust/wgpu-core/src/pipeline.rs new file mode 100644 index 0000000000..cd5a8ffb09 --- /dev/null +++ b/third_party/rust/wgpu-core/src/pipeline.rs @@ -0,0 +1,426 @@ +use crate::{ + binding_model::{CreateBindGroupLayoutError, CreatePipelineLayoutError}, + device::{DeviceError, MissingDownlevelFlags, MissingFeatures, RenderPassContext}, + hub::Resource, + id::{DeviceId, PipelineLayoutId, ShaderModuleId}, + validation, Label, LifeGuard, Stored, +}; +use arrayvec::ArrayVec; +use std::{borrow::Cow, error::Error, fmt, marker::PhantomData, num::NonZeroU32}; +use thiserror::Error; + +/// Information about buffer bindings, which +/// is validated against the shader (and pipeline) +/// at draw time as opposed to initialization time. +#[derive(Debug)] +pub(crate) struct LateSizedBufferGroup { + // The order has to match `BindGroup::late_buffer_binding_sizes`. + pub(crate) shader_sizes: Vec<wgt::BufferAddress>, +} + +#[allow(clippy::large_enum_variant)] +pub enum ShaderModuleSource<'a> { + #[cfg(feature = "wgsl")] + Wgsl(Cow<'a, str>), + Naga(Cow<'static, naga::Module>), + /// Dummy variant because `Naga` doesn't have a lifetime and without enough active features it + /// could be the last one active. + #[doc(hidden)] + Dummy(PhantomData<&'a ()>), +} + +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct ShaderModuleDescriptor<'a> { + pub label: Label<'a>, + #[cfg_attr(feature = "serde", serde(default))] + pub shader_bound_checks: wgt::ShaderBoundChecks, +} + +#[derive(Debug)] +pub struct ShaderModule<A: hal::Api> { + pub(crate) raw: A::ShaderModule, + pub(crate) device_id: Stored<DeviceId>, + pub(crate) interface: Option<validation::Interface>, + #[cfg(debug_assertions)] + pub(crate) label: String, +} + +impl<A: hal::Api> Resource for ShaderModule<A> { + const TYPE: &'static str = "ShaderModule"; + + fn life_guard(&self) -> &LifeGuard { + unreachable!() + } + + fn label(&self) -> &str { + #[cfg(debug_assertions)] + return &self.label; + #[cfg(not(debug_assertions))] + return ""; + } +} + +#[derive(Clone, Debug)] +pub struct ShaderError<E> { + pub source: String, + pub label: Option<String>, + pub inner: Box<E>, +} +#[cfg(feature = "wgsl")] +impl fmt::Display for ShaderError<naga::front::wgsl::ParseError> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let label = self.label.as_deref().unwrap_or_default(); + let string = self.inner.emit_to_string(&self.source); + write!(f, "\nShader '{}' parsing {}", label, string) + } +} +impl fmt::Display for ShaderError<naga::WithSpan<naga::valid::ValidationError>> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use codespan_reporting::{ + diagnostic::{Diagnostic, Label}, + files::SimpleFile, + term, + }; + + let label = self.label.as_deref().unwrap_or_default(); + let files = SimpleFile::new(label, &self.source); + let config = term::Config::default(); + let mut writer = term::termcolor::Ansi::new(Vec::new()); + + let diagnostic = Diagnostic::error().with_labels( + self.inner + .spans() + .map(|&(span, ref desc)| { + Label::primary((), span.to_range().unwrap()).with_message(desc.to_owned()) + }) + .collect(), + ); + + term::emit(&mut writer, &config, &files, &diagnostic).expect("cannot write error"); + + write!( + f, + "\nShader validation {}", + String::from_utf8_lossy(&writer.into_inner()) + ) + } +} +impl<E> Error for ShaderError<E> +where + ShaderError<E>: fmt::Display, + E: Error + 'static, +{ + fn source(&self) -> Option<&(dyn Error + 'static)> { + Some(&self.inner) + } +} + +//Note: `Clone` would require `WithSpan: Clone`. +#[derive(Debug, Error)] +pub enum CreateShaderModuleError { + #[cfg(feature = "wgsl")] + #[error(transparent)] + Parsing(#[from] ShaderError<naga::front::wgsl::ParseError>), + #[error("Failed to generate the backend-specific code")] + Generation, + #[error(transparent)] + Device(#[from] DeviceError), + #[error(transparent)] + Validation(#[from] ShaderError<naga::WithSpan<naga::valid::ValidationError>>), + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), + #[error( + "shader global {bind:?} uses a group index {group} that exceeds the max_bind_groups limit of {limit}." + )] + InvalidGroupIndex { + bind: naga::ResourceBinding, + group: u32, + limit: u32, + }, +} + +impl CreateShaderModuleError { + pub fn location(&self, source: &str) -> Option<naga::SourceLocation> { + match *self { + #[cfg(feature = "wgsl")] + CreateShaderModuleError::Parsing(ref err) => err.inner.location(source), + CreateShaderModuleError::Validation(ref err) => err.inner.location(source), + _ => None, + } + } +} + +/// Describes a programmable pipeline stage. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct ProgrammableStageDescriptor<'a> { + /// The compiled shader module for this stage. + pub module: ShaderModuleId, + /// The name of the entry point in the compiled shader. There must be a function with this name + /// in the shader. + pub entry_point: Cow<'a, str>, +} + +/// Number of implicit bind groups derived at pipeline creation. +pub type ImplicitBindGroupCount = u8; + +#[derive(Clone, Debug, Error)] +pub enum ImplicitLayoutError { + #[error("missing IDs for deriving {0} bind groups")] + MissingIds(ImplicitBindGroupCount), + #[error("unable to reflect the shader {0:?} interface")] + ReflectionError(wgt::ShaderStages), + #[error(transparent)] + BindGroup(#[from] CreateBindGroupLayoutError), + #[error(transparent)] + Pipeline(#[from] CreatePipelineLayoutError), +} + +/// Describes a compute pipeline. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct ComputePipelineDescriptor<'a> { + pub label: Label<'a>, + /// The layout of bind groups for this pipeline. + pub layout: Option<PipelineLayoutId>, + /// The compiled compute stage and its entry point. + pub stage: ProgrammableStageDescriptor<'a>, +} + +#[derive(Clone, Debug, Error)] +pub enum CreateComputePipelineError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("pipeline layout is invalid")] + InvalidLayout, + #[error("unable to derive an implicit layout")] + Implicit(#[from] ImplicitLayoutError), + #[error("error matching shader requirements against the pipeline")] + Stage(#[from] validation::StageError), + #[error("Internal error: {0}")] + Internal(String), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), +} + +#[derive(Debug)] +pub struct ComputePipeline<A: hal::Api> { + pub(crate) raw: A::ComputePipeline, + pub(crate) layout_id: Stored<PipelineLayoutId>, + pub(crate) device_id: Stored<DeviceId>, + pub(crate) late_sized_buffer_groups: ArrayVec<LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }>, + pub(crate) life_guard: LifeGuard, +} + +impl<A: hal::Api> Resource for ComputePipeline<A> { + const TYPE: &'static str = "ComputePipeline"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +/// Describes how the vertex buffer is interpreted. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +#[cfg_attr(feature = "serde", serde(rename_all = "camelCase"))] +pub struct VertexBufferLayout<'a> { + /// The stride, in bytes, between elements of this buffer. + pub array_stride: wgt::BufferAddress, + /// How often this vertex buffer is "stepped" forward. + pub step_mode: wgt::VertexStepMode, + /// The list of attributes which comprise a single vertex. + pub attributes: Cow<'a, [wgt::VertexAttribute]>, +} + +/// Describes the vertex process in a render pipeline. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct VertexState<'a> { + /// The compiled vertex stage and its entry point. + pub stage: ProgrammableStageDescriptor<'a>, + /// The format of any vertex buffers used with this pipeline. + pub buffers: Cow<'a, [VertexBufferLayout<'a>]>, +} + +/// Describes fragment processing in a render pipeline. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct FragmentState<'a> { + /// The compiled fragment stage and its entry point. + pub stage: ProgrammableStageDescriptor<'a>, + /// The effect of draw calls on the color aspect of the output target. + pub targets: Cow<'a, [Option<wgt::ColorTargetState>]>, +} + +/// Describes a render (graphics) pipeline. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct RenderPipelineDescriptor<'a> { + pub label: Label<'a>, + /// The layout of bind groups for this pipeline. + pub layout: Option<PipelineLayoutId>, + /// The vertex processing state for this pipeline. + pub vertex: VertexState<'a>, + /// The properties of the pipeline at the primitive assembly and rasterization level. + #[cfg_attr(any(feature = "replay", feature = "trace"), serde(default))] + pub primitive: wgt::PrimitiveState, + /// The effect of draw calls on the depth and stencil aspects of the output target, if any. + #[cfg_attr(any(feature = "replay", feature = "trace"), serde(default))] + pub depth_stencil: Option<wgt::DepthStencilState>, + /// The multi-sampling properties of the pipeline. + #[cfg_attr(any(feature = "replay", feature = "trace"), serde(default))] + pub multisample: wgt::MultisampleState, + /// The fragment processing state for this pipeline. + pub fragment: Option<FragmentState<'a>>, + /// If the pipeline will be used with a multiview render pass, this indicates how many array + /// layers the attachments will have. + pub multiview: Option<NonZeroU32>, +} + +#[derive(Clone, Debug, Error)] +pub enum ColorStateError { + #[error("format {0:?} is not renderable")] + FormatNotRenderable(wgt::TextureFormat), + #[error("format {0:?} is not blendable")] + FormatNotBlendable(wgt::TextureFormat), + #[error("format {0:?} does not have a color aspect")] + FormatNotColor(wgt::TextureFormat), + #[error("format {0:?} can't be multisampled")] + FormatNotMultisampled(wgt::TextureFormat), + #[error("output format {pipeline} is incompatible with the shader {shader}")] + IncompatibleFormat { + pipeline: validation::NumericType, + shader: validation::NumericType, + }, + #[error("blend factors for {0:?} must be `One`")] + InvalidMinMaxBlendFactors(wgt::BlendComponent), + #[error("invalid write mask {0:?}")] + InvalidWriteMask(wgt::ColorWrites), +} + +#[derive(Clone, Debug, Error)] +pub enum DepthStencilStateError { + #[error("format {0:?} is not renderable")] + FormatNotRenderable(wgt::TextureFormat), + #[error("format {0:?} does not have a depth aspect, but depth test/write is enabled")] + FormatNotDepth(wgt::TextureFormat), + #[error("format {0:?} does not have a stencil aspect, but stencil test/write is enabled")] + FormatNotStencil(wgt::TextureFormat), + #[error("format {0:?} can't be multisampled")] + FormatNotMultisampled(wgt::TextureFormat), +} + +#[derive(Clone, Debug, Error)] +pub enum CreateRenderPipelineError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("pipeline layout is invalid")] + InvalidLayout, + #[error("unable to derive an implicit layout")] + Implicit(#[from] ImplicitLayoutError), + #[error("color state [{0}] is invalid")] + ColorState(u8, #[source] ColorStateError), + #[error("depth/stencil state is invalid")] + DepthStencilState(#[from] DepthStencilStateError), + #[error("invalid sample count {0}")] + InvalidSampleCount(u32), + #[error("the number of color attachments {given} exceeds the limit {limit}")] + TooManyColorAttachments { given: u32, limit: u32 }, + #[error("the number of vertex buffers {given} exceeds the limit {limit}")] + TooManyVertexBuffers { given: u32, limit: u32 }, + #[error("the total number of vertex attributes {given} exceeds the limit {limit}")] + TooManyVertexAttributes { given: u32, limit: u32 }, + #[error("vertex buffer {index} stride {given} exceeds the limit {limit}")] + VertexStrideTooLarge { index: u32, given: u32, limit: u32 }, + #[error("vertex buffer {index} stride {stride} does not respect `VERTEX_STRIDE_ALIGNMENT`")] + UnalignedVertexStride { + index: u32, + stride: wgt::BufferAddress, + }, + #[error("vertex attribute at location {location} has invalid offset {offset}")] + InvalidVertexAttributeOffset { + location: wgt::ShaderLocation, + offset: wgt::BufferAddress, + }, + #[error("strip index format was not set to None but to {strip_index_format:?} while using the non-strip topology {topology:?}")] + StripIndexFormatForNonStripTopology { + strip_index_format: Option<wgt::IndexFormat>, + topology: wgt::PrimitiveTopology, + }, + #[error("Conservative Rasterization is only supported for wgt::PolygonMode::Fill")] + ConservativeRasterizationNonFillPolygonMode, + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), + #[error("error matching {stage:?} shader requirements against the pipeline")] + Stage { + stage: wgt::ShaderStages, + #[source] + error: validation::StageError, + }, + #[error("Internal error in {stage:?} shader: {error}")] + Internal { + stage: wgt::ShaderStages, + error: String, + }, +} + +bitflags::bitflags! { + #[repr(transparent)] + pub struct PipelineFlags: u32 { + const BLEND_CONSTANT = 1 << 0; + const STENCIL_REFERENCE = 1 << 1; + const WRITES_DEPTH = 1 << 2; + const WRITES_STENCIL = 1 << 3; + } +} + +/// How a render pipeline will retrieve attributes from a particular vertex buffer. +#[derive(Clone, Copy, Debug)] +pub struct VertexStep { + /// The byte stride in the buffer between one attribute value and the next. + pub stride: wgt::BufferAddress, + + /// Whether the buffer is indexed by vertex number or instance number. + pub mode: wgt::VertexStepMode, +} + +impl Default for VertexStep { + fn default() -> Self { + Self { + stride: 0, + mode: wgt::VertexStepMode::Vertex, + } + } +} + +#[derive(Debug)] +pub struct RenderPipeline<A: hal::Api> { + pub(crate) raw: A::RenderPipeline, + pub(crate) layout_id: Stored<PipelineLayoutId>, + pub(crate) device_id: Stored<DeviceId>, + pub(crate) pass_context: RenderPassContext, + pub(crate) flags: PipelineFlags, + pub(crate) strip_index_format: Option<wgt::IndexFormat>, + pub(crate) vertex_steps: Vec<VertexStep>, + pub(crate) late_sized_buffer_groups: ArrayVec<LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }>, + pub(crate) life_guard: LifeGuard, +} + +impl<A: hal::Api> Resource for RenderPipeline<A> { + const TYPE: &'static str = "RenderPipeline"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} diff --git a/third_party/rust/wgpu-core/src/present.rs b/third_party/rust/wgpu-core/src/present.rs new file mode 100644 index 0000000000..41da2fa585 --- /dev/null +++ b/third_party/rust/wgpu-core/src/present.rs @@ -0,0 +1,403 @@ +/*! Presentation. + +## Lifecycle + +Whenever a submission detects the use of any surface texture, it adds it to the device +tracker for the duration of the submission (temporarily, while recording). +It's added with `UNINITIALIZED` state and transitioned into `empty()` state. +When this texture is presented, we remove it from the device tracker as well as +extract it from the hub. +!*/ + +use std::borrow::Borrow; + +#[cfg(feature = "trace")] +use crate::device::trace::Action; +use crate::{ + conv, + device::DeviceError, + hub::{Global, GlobalIdentityHandlerFactory, HalApi, Input, Token}, + id::{DeviceId, SurfaceId, TextureId, Valid}, + init_tracker::TextureInitTracker, + resource, track, LifeGuard, Stored, +}; + +use hal::{Queue as _, Surface as _}; +use thiserror::Error; +use wgt::SurfaceStatus as Status; + +const FRAME_TIMEOUT_MS: u32 = 1000; +pub const DESIRED_NUM_FRAMES: u32 = 3; + +#[derive(Debug)] +pub(crate) struct Presentation { + pub(crate) device_id: Stored<DeviceId>, + pub(crate) config: wgt::SurfaceConfiguration, + #[allow(unused)] + pub(crate) num_frames: u32, + pub(crate) acquired_texture: Option<Stored<TextureId>>, +} + +impl Presentation { + pub(crate) fn backend(&self) -> wgt::Backend { + crate::id::TypedId::unzip(self.device_id.value.0).2 + } +} + +#[derive(Clone, Debug, Error)] +pub enum SurfaceError { + #[error("surface is invalid")] + Invalid, + #[error("surface is not configured for presentation")] + NotConfigured, + #[error(transparent)] + Device(#[from] DeviceError), + #[error("surface image is already acquired")] + AlreadyAcquired, + #[error("acquired frame is still referenced")] + StillReferenced, +} + +#[derive(Clone, Debug, Error)] +pub enum ConfigureSurfaceError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("invalid surface")] + InvalidSurface, + #[error("`SurfaceOutput` must be dropped before a new `Surface` is made")] + PreviousOutputExists, + #[error("Both `Surface` width and height must be non-zero. Wait to recreate the `Surface` until the window has non-zero area.")] + ZeroArea, + #[error("surface does not support the adapter's queue family")] + UnsupportedQueueFamily, + #[error("requested format {requested:?} is not in list of supported formats: {available:?}")] + UnsupportedFormat { + requested: wgt::TextureFormat, + available: Vec<wgt::TextureFormat>, + }, + #[error("requested present mode {requested:?} is not in the list of supported present modes: {available:?}")] + UnsupportedPresentMode { + requested: wgt::PresentMode, + available: Vec<wgt::PresentMode>, + }, + #[error("requested alpha mode {requested:?} is not in the list of supported alpha modes: {available:?}")] + UnsupportedAlphaMode { + requested: wgt::CompositeAlphaMode, + available: Vec<wgt::CompositeAlphaMode>, + }, + #[error("requested usage is not supported")] + UnsupportedUsage, +} + +#[repr(C)] +#[derive(Debug)] +pub struct SurfaceOutput { + pub status: Status, + pub texture_id: Option<TextureId>, +} + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + pub fn surface_get_current_texture<A: HalApi>( + &self, + surface_id: SurfaceId, + texture_id_in: Input<G, TextureId>, + ) -> Result<SurfaceOutput, SurfaceError> { + profiling::scope!("SwapChain::get_next_texture"); + + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.textures.prepare(texture_id_in); + + let (mut surface_guard, mut token) = self.surfaces.write(&mut token); + let surface = surface_guard + .get_mut(surface_id) + .map_err(|_| SurfaceError::Invalid)?; + let (device_guard, mut token) = hub.devices.read(&mut token); + + let (device, config) = match surface.presentation { + Some(ref present) => { + let device = &device_guard[present.device_id.value]; + (device, present.config.clone()) + } + None => return Err(SurfaceError::NotConfigured), + }; + + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace.lock().add(Action::GetSurfaceTexture { + id: fid.id(), + parent_id: surface_id, + }); + } + #[cfg(not(feature = "trace"))] + let _ = device; + + let suf = A::get_surface_mut(surface); + let (texture_id, status) = match unsafe { + suf.unwrap() + .raw + .acquire_texture(Some(std::time::Duration::from_millis( + FRAME_TIMEOUT_MS as u64, + ))) + } { + Ok(Some(ast)) => { + let clear_view_desc = hal::TextureViewDescriptor { + label: Some("(wgpu internal) clear surface texture view"), + format: config.format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::COLOR_TARGET, + range: wgt::ImageSubresourceRange::default(), + }; + let mut clear_views = smallvec::SmallVec::new(); + clear_views.push( + unsafe { + hal::Device::create_texture_view( + &device.raw, + ast.texture.borrow(), + &clear_view_desc, + ) + } + .map_err(DeviceError::from)?, + ); + + let present = surface.presentation.as_mut().unwrap(); + let texture = resource::Texture { + inner: resource::TextureInner::Surface { + raw: ast.texture, + parent_id: Valid(surface_id), + has_work: false, + }, + device_id: present.device_id.clone(), + desc: wgt::TextureDescriptor { + label: (), + size: wgt::Extent3d { + width: config.width, + height: config.height, + depth_or_array_layers: 1, + }, + sample_count: 1, + mip_level_count: 1, + format: config.format, + dimension: wgt::TextureDimension::D2, + usage: config.usage, + }, + hal_usage: conv::map_texture_usage(config.usage, config.format.into()), + format_features: wgt::TextureFormatFeatures { + allowed_usages: wgt::TextureUsages::RENDER_ATTACHMENT, + flags: wgt::TextureFormatFeatureFlags::MULTISAMPLE_X4 + | wgt::TextureFormatFeatureFlags::MULTISAMPLE_RESOLVE, + }, + initialization_status: TextureInitTracker::new(1, 1), + full_range: track::TextureSelector { + layers: 0..1, + mips: 0..1, + }, + life_guard: LifeGuard::new("<Surface>"), + clear_mode: resource::TextureClearMode::RenderPass { + clear_views, + is_color: true, + }, + }; + + let ref_count = texture.life_guard.add_ref(); + let id = fid.assign(texture, &mut token); + + { + // register it in the device tracker as uninitialized + let mut trackers = device.trackers.lock(); + trackers.textures.insert_single( + id.0, + ref_count.clone(), + hal::TextureUses::UNINITIALIZED, + ); + } + + if present.acquired_texture.is_some() { + return Err(SurfaceError::AlreadyAcquired); + } + present.acquired_texture = Some(Stored { + value: id, + ref_count, + }); + + let status = if ast.suboptimal { + Status::Suboptimal + } else { + Status::Good + }; + (Some(id.0), status) + } + Ok(None) => (None, Status::Timeout), + Err(err) => ( + None, + match err { + hal::SurfaceError::Lost => Status::Lost, + hal::SurfaceError::Device(err) => { + return Err(DeviceError::from(err).into()); + } + hal::SurfaceError::Outdated => Status::Outdated, + hal::SurfaceError::Other(msg) => { + log::error!("acquire error: {}", msg); + Status::Lost + } + }, + ), + }; + + Ok(SurfaceOutput { status, texture_id }) + } + + pub fn surface_present<A: HalApi>( + &self, + surface_id: SurfaceId, + ) -> Result<Status, SurfaceError> { + profiling::scope!("SwapChain::present"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (mut surface_guard, mut token) = self.surfaces.write(&mut token); + let surface = surface_guard + .get_mut(surface_id) + .map_err(|_| SurfaceError::Invalid)?; + let (mut device_guard, mut token) = hub.devices.write(&mut token); + + let present = match surface.presentation { + Some(ref mut present) => present, + None => return Err(SurfaceError::NotConfigured), + }; + + let device = &mut device_guard[present.device_id.value]; + + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace.lock().add(Action::Present(surface_id)); + } + + let result = { + let texture_id = present + .acquired_texture + .take() + .ok_or(SurfaceError::AlreadyAcquired)?; + + // The texture ID got added to the device tracker by `submit()`, + // and now we are moving it away. + log::debug!( + "Removing swapchain texture {:?} from the device tracker", + texture_id.value + ); + device.trackers.lock().textures.remove(texture_id.value); + + let (texture, _) = hub.textures.unregister(texture_id.value.0, &mut token); + if let Some(texture) = texture { + if let resource::TextureClearMode::RenderPass { clear_views, .. } = + texture.clear_mode + { + for clear_view in clear_views { + unsafe { + hal::Device::destroy_texture_view(&device.raw, clear_view); + } + } + } + + let suf = A::get_surface_mut(surface); + match texture.inner { + resource::TextureInner::Surface { + raw, + parent_id, + has_work, + } => { + if surface_id != parent_id.0 { + log::error!("Presented frame is from a different surface"); + Err(hal::SurfaceError::Lost) + } else if !has_work { + log::error!("No work has been submitted for this frame"); + unsafe { suf.unwrap().raw.discard_texture(raw) }; + Err(hal::SurfaceError::Outdated) + } else { + unsafe { device.queue.present(&mut suf.unwrap().raw, raw) } + } + } + resource::TextureInner::Native { .. } => unreachable!(), + } + } else { + Err(hal::SurfaceError::Outdated) //TODO? + } + }; + + log::debug!("Presented. End of Frame"); + + match result { + Ok(()) => Ok(Status::Good), + Err(err) => match err { + hal::SurfaceError::Lost => Ok(Status::Lost), + hal::SurfaceError::Device(err) => Err(SurfaceError::from(DeviceError::from(err))), + hal::SurfaceError::Outdated => Ok(Status::Outdated), + hal::SurfaceError::Other(msg) => { + log::error!("acquire error: {}", msg); + Err(SurfaceError::Invalid) + } + }, + } + } + + pub fn surface_texture_discard<A: HalApi>( + &self, + surface_id: SurfaceId, + ) -> Result<(), SurfaceError> { + profiling::scope!("SwapChain::discard"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (mut surface_guard, mut token) = self.surfaces.write(&mut token); + let surface = surface_guard + .get_mut(surface_id) + .map_err(|_| SurfaceError::Invalid)?; + let (mut device_guard, mut token) = hub.devices.write(&mut token); + + let present = match surface.presentation { + Some(ref mut present) => present, + None => return Err(SurfaceError::NotConfigured), + }; + + let device = &mut device_guard[present.device_id.value]; + + #[cfg(feature = "trace")] + if let Some(ref trace) = device.trace { + trace.lock().add(Action::DiscardSurfaceTexture(surface_id)); + } + + { + let texture_id = present + .acquired_texture + .take() + .ok_or(SurfaceError::AlreadyAcquired)?; + + // The texture ID got added to the device tracker by `submit()`, + // and now we are moving it away. + device.trackers.lock().textures.remove(texture_id.value); + + let (texture, _) = hub.textures.unregister(texture_id.value.0, &mut token); + if let Some(texture) = texture { + let suf = A::get_surface_mut(surface); + match texture.inner { + resource::TextureInner::Surface { + raw, + parent_id, + has_work: _, + } => { + if surface_id == parent_id.0 { + unsafe { suf.unwrap().raw.discard_texture(raw) }; + } else { + log::warn!("Surface texture is outdated"); + } + } + resource::TextureInner::Native { .. } => unreachable!(), + } + } + } + + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/resource.rs b/third_party/rust/wgpu-core/src/resource.rs new file mode 100644 index 0000000000..e59ec0aaa1 --- /dev/null +++ b/third_party/rust/wgpu-core/src/resource.rs @@ -0,0 +1,757 @@ +use crate::{ + device::{DeviceError, HostMap, MissingDownlevelFlags, MissingFeatures}, + hub::{Global, GlobalIdentityHandlerFactory, HalApi, Resource, Token}, + id::{AdapterId, DeviceId, SurfaceId, TextureId, Valid}, + init_tracker::{BufferInitTracker, TextureInitTracker}, + track::TextureSelector, + validation::MissingBufferUsageError, + Label, LifeGuard, RefCount, Stored, +}; + +use smallvec::SmallVec; +use thiserror::Error; + +use std::{borrow::Borrow, num::NonZeroU8, ops::Range, ptr::NonNull}; + +/// The status code provided to the buffer mapping callback. +/// +/// This is very similar to `BufferAccessResult`, except that this is FFI-friendly. +#[repr(C)] +#[derive(Debug)] +pub enum BufferMapAsyncStatus { + /// The Buffer is sucessfully mapped, `get_mapped_range` can be called. + /// + /// All other variants of this enum represent failures to map the buffer. + Success, + /// The buffer is already mapped. + /// + /// While this is treated as an error, it does not prevent mapped range from being accessed. + AlreadyMapped, + /// Mapping was already requested. + MapAlreadyPending, + /// An unknown error. + Error, + /// Mapping was aborted (by unmapping or destroying the buffer before mapping + /// happened). + Aborted, + /// The context is Lost. + ContextLost, + /// The buffer is in an invalid state. + Invalid, + /// The range isn't fully contained in the buffer. + InvalidRange, + /// The range isn't properly aligned. + InvalidAlignment, + /// Incompatible usage flags. + InvalidUsageFlags, +} + +pub(crate) enum BufferMapState<A: hal::Api> { + /// Mapped at creation. + Init { + ptr: NonNull<u8>, + stage_buffer: A::Buffer, + needs_flush: bool, + }, + /// Waiting for GPU to be done before mapping + Waiting(BufferPendingMapping), + /// Mapped + Active { + ptr: NonNull<u8>, + range: hal::MemoryRange, + host: HostMap, + }, + /// Not mapped + Idle, +} + +unsafe impl<A: hal::Api> Send for BufferMapState<A> {} +unsafe impl<A: hal::Api> Sync for BufferMapState<A> {} + +#[repr(C)] +pub struct BufferMapCallbackC { + pub callback: unsafe extern "C" fn(status: BufferMapAsyncStatus, user_data: *mut u8), + pub user_data: *mut u8, +} + +unsafe impl Send for BufferMapCallbackC {} + +pub struct BufferMapCallback { + // We wrap this so creating the enum in the C variant can be unsafe, + // allowing our call function to be safe. + inner: Option<BufferMapCallbackInner>, +} + +enum BufferMapCallbackInner { + Rust { + callback: Box<dyn FnOnce(BufferAccessResult) + Send + 'static>, + }, + C { + inner: BufferMapCallbackC, + }, +} + +impl BufferMapCallback { + pub fn from_rust(callback: Box<dyn FnOnce(BufferAccessResult) + Send + 'static>) -> Self { + Self { + inner: Some(BufferMapCallbackInner::Rust { callback }), + } + } + + /// # Safety + /// + /// - The callback pointer must be valid to call with the provided user_data + /// pointer. + /// + /// - Both pointers must point to valid memory until the callback is + /// invoked, which may happen at an unspecified time. + pub unsafe fn from_c(inner: BufferMapCallbackC) -> Self { + Self { + inner: Some(BufferMapCallbackInner::C { inner }), + } + } + + pub(crate) fn call(mut self, result: BufferAccessResult) { + match self.inner.take() { + Some(BufferMapCallbackInner::Rust { callback }) => { + callback(result); + } + // SAFETY: the contract of the call to from_c says that this unsafe is sound. + Some(BufferMapCallbackInner::C { inner }) => unsafe { + let status = match result { + Ok(()) => BufferMapAsyncStatus::Success, + Err(BufferAccessError::Device(_)) => BufferMapAsyncStatus::ContextLost, + Err(BufferAccessError::Invalid) | Err(BufferAccessError::Destroyed) => { + BufferMapAsyncStatus::Invalid + } + Err(BufferAccessError::AlreadyMapped) => BufferMapAsyncStatus::AlreadyMapped, + Err(BufferAccessError::MapAlreadyPending) => { + BufferMapAsyncStatus::MapAlreadyPending + } + Err(BufferAccessError::MissingBufferUsage(_)) => { + BufferMapAsyncStatus::InvalidUsageFlags + } + Err(BufferAccessError::UnalignedRange) + | Err(BufferAccessError::UnalignedRangeSize { .. }) + | Err(BufferAccessError::UnalignedOffset { .. }) => { + BufferMapAsyncStatus::InvalidAlignment + } + Err(BufferAccessError::OutOfBoundsUnderrun { .. }) + | Err(BufferAccessError::OutOfBoundsOverrun { .. }) + | Err(BufferAccessError::NegativeRange { .. }) => { + BufferMapAsyncStatus::InvalidRange + } + Err(_) => BufferMapAsyncStatus::Error, + }; + + (inner.callback)(status, inner.user_data); + }, + None => { + panic!("Map callback invoked twice"); + } + } + } +} + +impl Drop for BufferMapCallback { + fn drop(&mut self) { + if self.inner.is_some() { + panic!("Map callback was leaked"); + } + } +} + +pub struct BufferMapOperation { + pub host: HostMap, + pub callback: BufferMapCallback, +} + +#[derive(Clone, Debug, Error)] +pub enum BufferAccessError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("buffer map failed")] + Failed, + #[error("buffer is invalid")] + Invalid, + #[error("buffer is destroyed")] + Destroyed, + #[error("buffer is already mapped")] + AlreadyMapped, + #[error("buffer map is pending")] + MapAlreadyPending, + #[error(transparent)] + MissingBufferUsage(#[from] MissingBufferUsageError), + #[error("buffer is not mapped")] + NotMapped, + #[error( + "buffer map range must start aligned to `MAP_ALIGNMENT` and end to `COPY_BUFFER_ALIGNMENT`" + )] + UnalignedRange, + #[error("buffer offset invalid: offset {offset} must be multiple of 8")] + UnalignedOffset { offset: wgt::BufferAddress }, + #[error("buffer range size invalid: range_size {range_size} must be multiple of 4")] + UnalignedRangeSize { range_size: wgt::BufferAddress }, + #[error("buffer access out of bounds: index {index} would underrun the buffer (limit: {min})")] + OutOfBoundsUnderrun { + index: wgt::BufferAddress, + min: wgt::BufferAddress, + }, + #[error( + "buffer access out of bounds: last index {index} would overrun the buffer (limit: {max})" + )] + OutOfBoundsOverrun { + index: wgt::BufferAddress, + max: wgt::BufferAddress, + }, + #[error("buffer map range start {start} is greater than end {end}")] + NegativeRange { + start: wgt::BufferAddress, + end: wgt::BufferAddress, + }, + #[error("buffer map aborted")] + MapAborted, +} + +pub type BufferAccessResult = Result<(), BufferAccessError>; +pub(crate) struct BufferPendingMapping { + pub range: Range<wgt::BufferAddress>, + pub op: BufferMapOperation, + // hold the parent alive while the mapping is active + pub _parent_ref_count: RefCount, +} + +pub type BufferDescriptor<'a> = wgt::BufferDescriptor<Label<'a>>; + +pub struct Buffer<A: hal::Api> { + pub(crate) raw: Option<A::Buffer>, + pub(crate) device_id: Stored<DeviceId>, + pub(crate) usage: wgt::BufferUsages, + pub(crate) size: wgt::BufferAddress, + pub(crate) initialization_status: BufferInitTracker, + pub(crate) sync_mapped_writes: Option<hal::MemoryRange>, + pub(crate) life_guard: LifeGuard, + pub(crate) map_state: BufferMapState<A>, +} + +#[derive(Clone, Debug, Error)] +pub enum CreateBufferError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("failed to map buffer while creating: {0}")] + AccessError(#[from] BufferAccessError), + #[error("buffers that are mapped at creation have to be aligned to `COPY_BUFFER_ALIGNMENT`")] + UnalignedSize, + #[error("Invalid usage flags {0:?}")] + InvalidUsage(wgt::BufferUsages), + #[error("`MAP` usage can only be combined with the opposite `COPY`, requested {0:?}")] + UsageMismatch(wgt::BufferUsages), + #[error("Buffer size {requested} is greater than the maximum buffer size ({maximum})")] + MaxBufferSize { requested: u64, maximum: u64 }, + #[error(transparent)] + MissingDownlevelFlags(#[from] MissingDownlevelFlags), +} + +impl<A: hal::Api> Resource for Buffer<A> { + const TYPE: &'static str = "Buffer"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +/// A temporary buffer, consumed by the command that uses it. +/// +/// A [`StagingBuffer`] is designed for one-shot uploads of data to the GPU. It +/// is always created mapped, and the command that uses it destroys the buffer +/// when it is done. +/// +/// [`StagingBuffer`]s can be created with [`queue_create_staging_buffer`] and +/// used with [`queue_write_staging_buffer`]. They are also used internally by +/// operations like [`queue_write_texture`] that need to upload data to the GPU, +/// but that don't belong to any particular wgpu command buffer. +/// +/// Used `StagingBuffer`s are accumulated in [`Device::pending_writes`], to be +/// freed once their associated operation's queue submission has finished +/// execution. +/// +/// [`queue_create_staging_buffer`]: Global::queue_create_staging_buffer +/// [`queue_write_staging_buffer`]: Global::queue_write_staging_buffer +/// [`queue_write_texture`]: Global::queue_write_texture +/// [`Device::pending_writes`]: crate::device::Device +pub struct StagingBuffer<A: hal::Api> { + pub(crate) raw: A::Buffer, + pub(crate) size: wgt::BufferAddress, + pub(crate) is_coherent: bool, +} + +impl<A: hal::Api> Resource for StagingBuffer<A> { + const TYPE: &'static str = "StagingBuffer"; + + fn life_guard(&self) -> &LifeGuard { + unreachable!() + } + + fn label(&self) -> &str { + "<StagingBuffer>" + } +} + +pub type TextureDescriptor<'a> = wgt::TextureDescriptor<Label<'a>>; + +#[derive(Debug)] +pub(crate) enum TextureInner<A: hal::Api> { + Native { + raw: Option<A::Texture>, + }, + Surface { + raw: A::SurfaceTexture, + parent_id: Valid<SurfaceId>, + has_work: bool, + }, +} + +impl<A: hal::Api> TextureInner<A> { + pub fn as_raw(&self) -> Option<&A::Texture> { + match *self { + Self::Native { raw: Some(ref tex) } => Some(tex), + Self::Native { raw: None } => None, + Self::Surface { ref raw, .. } => Some(raw.borrow()), + } + } +} + +#[derive(Debug)] +pub enum TextureClearMode<A: hal::Api> { + BufferCopy, + // View for clear via RenderPass for every subsurface (mip/layer/slice) + RenderPass { + clear_views: SmallVec<[A::TextureView; 1]>, + is_color: bool, + }, + // Texture can't be cleared, attempting to do so will cause panic. + // (either because it is impossible for the type of texture or it is being destroyed) + None, +} + +#[derive(Debug)] +pub struct Texture<A: hal::Api> { + pub(crate) inner: TextureInner<A>, + pub(crate) device_id: Stored<DeviceId>, + pub(crate) desc: wgt::TextureDescriptor<()>, + pub(crate) hal_usage: hal::TextureUses, + pub(crate) format_features: wgt::TextureFormatFeatures, + pub(crate) initialization_status: TextureInitTracker, + pub(crate) full_range: TextureSelector, + pub(crate) life_guard: LifeGuard, + pub(crate) clear_mode: TextureClearMode<A>, +} + +impl<A: hal::Api> Texture<A> { + pub(crate) fn get_clear_view(&self, mip_level: u32, depth_or_layer: u32) -> &A::TextureView { + match self.clear_mode { + TextureClearMode::BufferCopy => { + panic!("Given texture is cleared with buffer copies, not render passes") + } + TextureClearMode::None => { + panic!("Given texture can't be cleared") + } + TextureClearMode::RenderPass { + ref clear_views, .. + } => { + let index = if self.desc.dimension == wgt::TextureDimension::D3 { + (0..mip_level).fold(0, |acc, mip| { + acc + (self.desc.size.depth_or_array_layers >> mip).max(1) + }) + } else { + mip_level * self.desc.size.depth_or_array_layers + } + depth_or_layer; + &clear_views[index as usize] + } + } + } +} + +impl<G: GlobalIdentityHandlerFactory> Global<G> { + /// # Safety + /// + /// - The raw texture handle must not be manually destroyed + pub unsafe fn texture_as_hal<A: HalApi, F: FnOnce(Option<&A::Texture>)>( + &self, + id: TextureId, + hal_texture_callback: F, + ) { + profiling::scope!("Texture::as_hal"); + + let hub = A::hub(self); + let mut token = Token::root(); + let (guard, _) = hub.textures.read(&mut token); + let texture = guard.try_get(id).ok().flatten(); + let hal_texture = texture.and_then(|tex| tex.inner.as_raw()); + + hal_texture_callback(hal_texture); + } + + /// # Safety + /// + /// - The raw adapter handle must not be manually destroyed + pub unsafe fn adapter_as_hal<A: HalApi, F: FnOnce(Option<&A::Adapter>) -> R, R>( + &self, + id: AdapterId, + hal_adapter_callback: F, + ) -> R { + profiling::scope!("Adapter::as_hal"); + + let hub = A::hub(self); + let mut token = Token::root(); + + let (guard, _) = hub.adapters.read(&mut token); + let adapter = guard.try_get(id).ok().flatten(); + let hal_adapter = adapter.map(|adapter| &adapter.raw.adapter); + + hal_adapter_callback(hal_adapter) + } + + /// # Safety + /// + /// - The raw device handle must not be manually destroyed + pub unsafe fn device_as_hal<A: HalApi, F: FnOnce(Option<&A::Device>) -> R, R>( + &self, + id: DeviceId, + hal_device_callback: F, + ) -> R { + profiling::scope!("Device::as_hal"); + + let hub = A::hub(self); + let mut token = Token::root(); + let (guard, _) = hub.devices.read(&mut token); + let device = guard.try_get(id).ok().flatten(); + let hal_device = device.map(|device| &device.raw); + + hal_device_callback(hal_device) + } + + /// # Safety + /// - The raw surface handle must not be manually destroyed + pub unsafe fn surface_as_hal_mut<A: HalApi, F: FnOnce(Option<&mut A::Surface>) -> R, R>( + &self, + id: SurfaceId, + hal_surface_callback: F, + ) -> R { + profiling::scope!("Surface::as_hal_mut"); + + let mut token = Token::root(); + let (mut guard, _) = self.surfaces.write(&mut token); + let surface = guard.get_mut(id).ok(); + let hal_surface = surface + .and_then(|surface| A::get_surface_mut(surface)) + .map(|surface| &mut surface.raw); + + hal_surface_callback(hal_surface) + } +} + +#[derive(Clone, Copy, Debug)] +pub enum TextureErrorDimension { + X, + Y, + Z, +} + +#[derive(Clone, Debug, Error)] +pub enum TextureDimensionError { + #[error("Dimension {0:?} is zero")] + Zero(TextureErrorDimension), + #[error("Dimension {dim:?} value {given} exceeds the limit of {limit}")] + LimitExceeded { + dim: TextureErrorDimension, + given: u32, + limit: u32, + }, + #[error("Sample count {0} is invalid")] + InvalidSampleCount(u32), + #[error("Width {width} is not a multiple of {format:?}'s block width ({block_width})")] + NotMultipleOfBlockWidth { + width: u32, + block_width: u32, + format: wgt::TextureFormat, + }, + #[error("Height {height} is not a multiple of {format:?}'s block height ({block_height})")] + NotMultipleOfBlockHeight { + height: u32, + block_height: u32, + format: wgt::TextureFormat, + }, + #[error("Multisampled texture depth or array layers must be 1, got {0}")] + MultisampledDepthOrArrayLayer(u32), +} + +#[derive(Clone, Debug, Error)] +pub enum CreateTextureError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("Invalid usage flags {0:?}")] + InvalidUsage(wgt::TextureUsages), + #[error(transparent)] + InvalidDimension(#[from] TextureDimensionError), + #[error("Depth texture ({1:?}) can't be created as {0:?}")] + InvalidDepthDimension(wgt::TextureDimension, wgt::TextureFormat), + #[error("Compressed texture ({1:?}) can't be created as {0:?}")] + InvalidCompressedDimension(wgt::TextureDimension, wgt::TextureFormat), + #[error( + "Texture descriptor mip level count {requested} is invalid, maximum allowed is {maximum}" + )] + InvalidMipLevelCount { requested: u32, maximum: u32 }, + #[error( + "Texture usages {0:?} are not allowed on a texture of type {1:?}{}", + if *.2 { " due to downlevel restrictions" } else { "" } + )] + InvalidFormatUsages(wgt::TextureUsages, wgt::TextureFormat, bool), + #[error("Texture usages {0:?} are not allowed on a texture of dimensions {1:?}")] + InvalidDimensionUsages(wgt::TextureUsages, wgt::TextureDimension), + #[error("Texture usage STORAGE_BINDING is not allowed for multisampled textures")] + InvalidMultisampledStorageBinding, + #[error("Format {0:?} does not support multisampling")] + InvalidMultisampledFormat(wgt::TextureFormat), + #[error("Multisampled textures must have RENDER_ATTACHMENT usage")] + MultisampledNotRenderAttachment, + #[error("Texture format {0:?} can't be used due to missing features.")] + MissingFeatures(wgt::TextureFormat, #[source] MissingFeatures), + #[error("Sample count {0} is not supported by format {1:?} on this device. It may be supported by your adapter through the TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES feature.")] + InvalidSampleCount(u32, wgt::TextureFormat), +} + +impl<A: hal::Api> Resource for Texture<A> { + const TYPE: &'static str = "Texture"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +impl<A: hal::Api> Borrow<TextureSelector> for Texture<A> { + fn borrow(&self) -> &TextureSelector { + &self.full_range + } +} + +/// Describes a [`TextureView`]. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize), serde(default))] +pub struct TextureViewDescriptor<'a> { + /// Debug label of the texture view. + /// + /// This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// Format of the texture view, or `None` for the same format as the texture + /// itself. + /// + /// At this time, it must be the same the underlying format of the texture. + pub format: Option<wgt::TextureFormat>, + /// The dimension of the texture view. + /// + /// - For 1D textures, this must be `D1`. + /// - For 2D textures it must be one of `D2`, `D2Array`, `Cube`, or `CubeArray`. + /// - For 3D textures it must be `D3`. + pub dimension: Option<wgt::TextureViewDimension>, + /// Range within the texture that is accessible via this view. + pub range: wgt::ImageSubresourceRange, +} + +#[derive(Debug)] +pub(crate) struct HalTextureViewDescriptor { + pub format: wgt::TextureFormat, + pub dimension: wgt::TextureViewDimension, + pub range: wgt::ImageSubresourceRange, +} + +impl HalTextureViewDescriptor { + pub fn aspects(&self) -> hal::FormatAspects { + hal::FormatAspects::from(self.format) & hal::FormatAspects::from(self.range.aspect) + } +} + +#[derive(Debug)] +pub struct TextureView<A: hal::Api> { + pub(crate) raw: A::TextureView, + // The parent's refcount is held alive, but the parent may still be deleted + // if it's a surface texture. TODO: make this cleaner. + pub(crate) parent_id: Stored<TextureId>, + pub(crate) device_id: Stored<DeviceId>, + //TODO: store device_id for quick access? + pub(crate) desc: HalTextureViewDescriptor, + pub(crate) format_features: wgt::TextureFormatFeatures, + pub(crate) extent: wgt::Extent3d, + pub(crate) samples: u32, + pub(crate) selector: TextureSelector, + pub(crate) life_guard: LifeGuard, +} + +#[derive(Clone, Debug, Error)] +pub enum CreateTextureViewError { + #[error("parent texture is invalid or destroyed")] + InvalidTexture, + #[error("not enough memory left")] + OutOfMemory, + #[error("Invalid texture view dimension `{view:?}` with texture of dimension `{texture:?}`")] + InvalidTextureViewDimension { + view: wgt::TextureViewDimension, + texture: wgt::TextureDimension, + }, + #[error("Invalid texture view dimension `{0:?}` of a multisampled texture")] + InvalidMultisampledTextureViewDimension(wgt::TextureViewDimension), + #[error("Invalid texture depth `{depth}` for texture view of dimension `Cubemap`. Cubemap views must use images of size 6.")] + InvalidCubemapTextureDepth { depth: u32 }, + #[error("Invalid texture depth `{depth}` for texture view of dimension `CubemapArray`. Cubemap views must use images with sizes which are a multiple of 6.")] + InvalidCubemapArrayTextureDepth { depth: u32 }, + #[error( + "TextureView mip level count + base mip level {requested} must be <= Texture mip level count {total}" + )] + TooManyMipLevels { requested: u32, total: u32 }, + #[error("TextureView array layer count + base array layer {requested} must be <= Texture depth/array layer count {total}")] + TooManyArrayLayers { requested: u32, total: u32 }, + #[error("Requested array layer count {requested} is not valid for the target view dimension {dim:?}")] + InvalidArrayLayerCount { + requested: u32, + dim: wgt::TextureViewDimension, + }, + #[error("Aspect {requested_aspect:?} is not in the source texture format {texture_format:?}")] + InvalidAspect { + texture_format: wgt::TextureFormat, + requested_aspect: wgt::TextureAspect, + }, + #[error("Unable to view texture {texture:?} as {view:?}")] + FormatReinterpretation { + texture: wgt::TextureFormat, + view: wgt::TextureFormat, + }, +} + +#[derive(Clone, Debug, Error)] +pub enum TextureViewDestroyError {} + +impl<A: hal::Api> Resource for TextureView<A> { + const TYPE: &'static str = "TextureView"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +/// Describes a [`Sampler`] +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct SamplerDescriptor<'a> { + /// Debug label of the sampler. + /// + /// This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// How to deal with out of bounds accesses in the u (i.e. x) direction + pub address_modes: [wgt::AddressMode; 3], + /// How to filter the texture when it needs to be magnified (made larger) + pub mag_filter: wgt::FilterMode, + /// How to filter the texture when it needs to be minified (made smaller) + pub min_filter: wgt::FilterMode, + /// How to filter between mip map levels + pub mipmap_filter: wgt::FilterMode, + /// Minimum level of detail (i.e. mip level) to use + pub lod_min_clamp: f32, + /// Maximum level of detail (i.e. mip level) to use + pub lod_max_clamp: f32, + /// If this is enabled, this is a comparison sampler using the given comparison function. + pub compare: Option<wgt::CompareFunction>, + /// Valid values: 1, 2, 4, 8, and 16. + pub anisotropy_clamp: Option<NonZeroU8>, + /// Border color to use when address_mode is + /// [`AddressMode::ClampToBorder`](wgt::AddressMode::ClampToBorder) + pub border_color: Option<wgt::SamplerBorderColor>, +} + +impl Default for SamplerDescriptor<'_> { + fn default() -> Self { + Self { + label: None, + address_modes: Default::default(), + mag_filter: Default::default(), + min_filter: Default::default(), + mipmap_filter: Default::default(), + lod_min_clamp: 0.0, + lod_max_clamp: std::f32::MAX, + compare: None, + anisotropy_clamp: None, + border_color: None, + } + } +} + +#[derive(Debug)] +pub struct Sampler<A: hal::Api> { + pub(crate) raw: A::Sampler, + pub(crate) device_id: Stored<DeviceId>, + pub(crate) life_guard: LifeGuard, + /// `true` if this is a comparison sampler + pub(crate) comparison: bool, + /// `true` if this is a filtering sampler + pub(crate) filtering: bool, +} + +#[derive(Clone, Debug, Error)] +pub enum CreateSamplerError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("invalid anisotropic clamp {0}, must be one of 1, 2, 4, 8 or 16")] + InvalidClamp(u8), + #[error("cannot create any more samplers")] + TooManyObjects, + /// AddressMode::ClampToBorder requires feature ADDRESS_MODE_CLAMP_TO_BORDER. + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), +} + +impl<A: hal::Api> Resource for Sampler<A> { + const TYPE: &'static str = "Sampler"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +#[derive(Clone, Debug, Error)] +pub enum CreateQuerySetError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("QuerySets cannot be made with zero queries")] + ZeroCount, + #[error("{count} is too many queries for a single QuerySet. QuerySets cannot be made more than {maximum} queries.")] + TooManyQueries { count: u32, maximum: u32 }, + #[error(transparent)] + MissingFeatures(#[from] MissingFeatures), +} + +pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor<Label<'a>>; + +#[derive(Debug)] +pub struct QuerySet<A: hal::Api> { + pub(crate) raw: A::QuerySet, + pub(crate) device_id: Stored<DeviceId>, + pub(crate) life_guard: LifeGuard, + pub(crate) desc: wgt::QuerySetDescriptor<()>, +} + +impl<A: hal::Api> Resource for QuerySet<A> { + const TYPE: &'static str = "QuerySet"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +#[derive(Clone, Debug, Error)] +pub enum DestroyError { + #[error("resource is invalid")] + Invalid, + #[error("resource is already destroyed")] + AlreadyDestroyed, +} diff --git a/third_party/rust/wgpu-core/src/track/buffer.rs b/third_party/rust/wgpu-core/src/track/buffer.rs new file mode 100644 index 0000000000..b7682968b2 --- /dev/null +++ b/third_party/rust/wgpu-core/src/track/buffer.rs @@ -0,0 +1,791 @@ +/*! Buffer Trackers + * + * Buffers are represented by a single state for the whole resource, + * a 16 bit bitflag of buffer usages. Because there is only ever + * one subresource, they have no selector. +!*/ + +use std::{borrow::Cow, marker::PhantomData, vec::Drain}; + +use super::PendingTransition; +use crate::{ + hub, + id::{BufferId, TypedId, Valid}, + resource::Buffer, + track::{ + invalid_resource_state, skip_barrier, ResourceMetadata, ResourceMetadataProvider, + ResourceUses, UsageConflict, + }, + LifeGuard, RefCount, +}; +use hal::BufferUses; +use wgt::{strict_assert, strict_assert_eq}; + +impl ResourceUses for BufferUses { + const EXCLUSIVE: Self = Self::EXCLUSIVE; + + type Id = BufferId; + type Selector = (); + + fn bits(self) -> u16 { + Self::bits(&self) + } + + fn all_ordered(self) -> bool { + Self::ORDERED.contains(self) + } + + fn any_exclusive(self) -> bool { + self.intersects(Self::EXCLUSIVE) + } +} + +/// Stores all the buffers that a bind group stores. +pub(crate) struct BufferBindGroupState<A: hub::HalApi> { + buffers: Vec<(Valid<BufferId>, RefCount, BufferUses)>, + + _phantom: PhantomData<A>, +} +impl<A: hub::HalApi> BufferBindGroupState<A> { + pub fn new() -> Self { + Self { + buffers: Vec::new(), + + _phantom: PhantomData, + } + } + + /// Optimize the buffer bind group state by sorting it by ID. + /// + /// When this list of states is merged into a tracker, the memory + /// accesses will be in a constant assending order. + pub(crate) fn optimize(&mut self) { + self.buffers + .sort_unstable_by_key(|&(id, _, _)| id.0.unzip().0); + } + + /// Returns a list of all buffers tracked. May contain duplicates. + pub fn used(&self) -> impl Iterator<Item = Valid<BufferId>> + '_ { + self.buffers.iter().map(|&(id, _, _)| id) + } + + /// Adds the given resource with the given state. + pub fn add_single<'a>( + &mut self, + storage: &'a hub::Storage<Buffer<A>, BufferId>, + id: BufferId, + state: BufferUses, + ) -> Option<&'a Buffer<A>> { + let buffer = storage.get(id).ok()?; + + self.buffers + .push((Valid(id), buffer.life_guard.add_ref(), state)); + + Some(buffer) + } +} + +/// Stores all buffer state within a single usage scope. +#[derive(Debug)] +pub(crate) struct BufferUsageScope<A: hub::HalApi> { + state: Vec<BufferUses>, + + metadata: ResourceMetadata<A>, +} + +impl<A: hub::HalApi> BufferUsageScope<A> { + pub fn new() -> Self { + Self { + state: Vec::new(), + + metadata: ResourceMetadata::new(), + } + } + + fn tracker_assert_in_bounds(&self, index: usize) { + strict_assert!(index < self.state.len()); + self.metadata.tracker_assert_in_bounds(index); + } + + /// Sets the size of all the vectors inside the tracker. + /// + /// Must be called with the highest possible Buffer ID before + /// all unsafe functions are called. + pub fn set_size(&mut self, size: usize) { + self.state.resize(size, BufferUses::empty()); + self.metadata.set_size(size); + } + + /// Extend the vectors to let the given index be valid. + fn allow_index(&mut self, index: usize) { + if index >= self.state.len() { + self.set_size(index + 1); + } + } + + /// Returns a list of all buffers tracked. + pub fn used(&self) -> impl Iterator<Item = Valid<BufferId>> + '_ { + self.metadata.owned_ids() + } + + /// Merge the list of buffer states in the given bind group into this usage scope. + /// + /// If any of the resulting states is invalid, stops the merge and returns a usage + /// conflict with the details of the invalid state. + /// + /// Because bind groups do not check if the union of all their states is valid, + /// this method is allowed to return Err on the first bind group bound. + /// + /// # Safety + /// + /// [`Self::set_size`] must be called with the maximum possible Buffer ID before this + /// method is called. + pub unsafe fn merge_bind_group( + &mut self, + bind_group: &BufferBindGroupState<A>, + ) -> Result<(), UsageConflict> { + for &(id, ref ref_count, state) in &bind_group.buffers { + let (index32, epoch, _) = id.0.unzip(); + let index = index32 as usize; + + unsafe { + insert_or_merge( + None, + None, + &mut self.state, + &mut self.metadata, + index32, + index, + BufferStateProvider::Direct { state }, + ResourceMetadataProvider::Direct { + epoch, + ref_count: Cow::Borrowed(ref_count), + }, + )? + }; + } + + Ok(()) + } + + /// Merge the list of buffer states in the given usage scope into this UsageScope. + /// + /// If any of the resulting states is invalid, stops the merge and returns a usage + /// conflict with the details of the invalid state. + /// + /// If the given tracker uses IDs higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn merge_usage_scope(&mut self, scope: &Self) -> Result<(), UsageConflict> { + let incoming_size = scope.state.len(); + if incoming_size > self.state.len() { + self.set_size(incoming_size); + } + + for index in scope.metadata.owned_indices() { + self.tracker_assert_in_bounds(index); + scope.tracker_assert_in_bounds(index); + + unsafe { + insert_or_merge( + None, + None, + &mut self.state, + &mut self.metadata, + index as u32, + index, + BufferStateProvider::Indirect { + state: &scope.state, + }, + ResourceMetadataProvider::Indirect { + metadata: &scope.metadata, + }, + )?; + }; + } + + Ok(()) + } + + /// Merge a single state into the UsageScope. + /// + /// If the resulting state is invalid, returns a usage + /// conflict with the details of the invalid state. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn merge_single<'a>( + &mut self, + storage: &'a hub::Storage<Buffer<A>, BufferId>, + id: BufferId, + new_state: BufferUses, + ) -> Result<&'a Buffer<A>, UsageConflict> { + let buffer = storage + .get(id) + .map_err(|_| UsageConflict::BufferInvalid { id })?; + + let (index32, epoch, _) = id.unzip(); + let index = index32 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + insert_or_merge( + Some(&buffer.life_guard), + None, + &mut self.state, + &mut self.metadata, + index32, + index, + BufferStateProvider::Direct { state: new_state }, + ResourceMetadataProvider::Resource { epoch }, + )?; + } + + Ok(buffer) + } +} + +/// Stores all buffer state within a command buffer or device. +pub(crate) struct BufferTracker<A: hub::HalApi> { + start: Vec<BufferUses>, + end: Vec<BufferUses>, + + metadata: ResourceMetadata<A>, + + temp: Vec<PendingTransition<BufferUses>>, +} +impl<A: hub::HalApi> BufferTracker<A> { + pub fn new() -> Self { + Self { + start: Vec::new(), + end: Vec::new(), + + metadata: ResourceMetadata::new(), + + temp: Vec::new(), + } + } + + fn tracker_assert_in_bounds(&self, index: usize) { + strict_assert!(index < self.start.len()); + strict_assert!(index < self.end.len()); + self.metadata.tracker_assert_in_bounds(index); + } + + /// Sets the size of all the vectors inside the tracker. + /// + /// Must be called with the highest possible Buffer ID before + /// all unsafe functions are called. + pub fn set_size(&mut self, size: usize) { + self.start.resize(size, BufferUses::empty()); + self.end.resize(size, BufferUses::empty()); + + self.metadata.set_size(size); + } + + /// Extend the vectors to let the given index be valid. + fn allow_index(&mut self, index: usize) { + if index >= self.start.len() { + self.set_size(index + 1); + } + } + + /// Returns a list of all buffers tracked. + pub fn used(&self) -> impl Iterator<Item = Valid<BufferId>> + '_ { + self.metadata.owned_ids() + } + + /// Drains all currently pending transitions. + pub fn drain(&mut self) -> Drain<'_, PendingTransition<BufferUses>> { + self.temp.drain(..) + } + + /// Inserts a single buffer and its state into the resource tracker. + /// + /// If the resource already exists in the tracker, this will panic. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn insert_single(&mut self, id: Valid<BufferId>, ref_count: RefCount, state: BufferUses) { + let (index32, epoch, _) = id.0.unzip(); + let index = index32 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + let currently_owned = self.metadata.contains_unchecked(index); + + if currently_owned { + panic!("Tried to insert buffer already tracked"); + } + + insert( + None, + Some(&mut self.start), + &mut self.end, + &mut self.metadata, + index, + BufferStateProvider::Direct { state }, + None, + ResourceMetadataProvider::Direct { + epoch, + ref_count: Cow::Owned(ref_count), + }, + ) + } + } + + /// Sets the state of a single buffer. + /// + /// If a transition is needed to get the buffer into the given state, that transition + /// is returned. No more than one transition is needed. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn set_single<'a>( + &mut self, + storage: &'a hub::Storage<Buffer<A>, BufferId>, + id: BufferId, + state: BufferUses, + ) -> Option<(&'a Buffer<A>, Option<PendingTransition<BufferUses>>)> { + let value = storage.get(id).ok()?; + + let (index32, epoch, _) = id.unzip(); + let index = index32 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + insert_or_barrier_update( + Some(&value.life_guard), + Some(&mut self.start), + &mut self.end, + &mut self.metadata, + index32, + index, + BufferStateProvider::Direct { state }, + None, + ResourceMetadataProvider::Resource { epoch }, + &mut self.temp, + ) + }; + + strict_assert!(self.temp.len() <= 1); + + Some((value, self.temp.pop())) + } + + /// Sets the given state for all buffers in the given tracker. + /// + /// If a transition is needed to get the buffers into the needed state, + /// those transitions are stored within the tracker. A subsequent + /// call to [`Self::drain`] is needed to get those transitions. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn set_from_tracker(&mut self, tracker: &Self) { + let incoming_size = tracker.start.len(); + if incoming_size > self.start.len() { + self.set_size(incoming_size); + } + + for index in tracker.metadata.owned_indices() { + self.tracker_assert_in_bounds(index); + tracker.tracker_assert_in_bounds(index); + unsafe { + insert_or_barrier_update( + None, + Some(&mut self.start), + &mut self.end, + &mut self.metadata, + index as u32, + index, + BufferStateProvider::Indirect { + state: &tracker.start, + }, + Some(BufferStateProvider::Indirect { + state: &tracker.end, + }), + ResourceMetadataProvider::Indirect { + metadata: &tracker.metadata, + }, + &mut self.temp, + ) + } + } + } + + /// Sets the given state for all buffers in the given UsageScope. + /// + /// If a transition is needed to get the buffers into the needed state, + /// those transitions are stored within the tracker. A subsequent + /// call to [`Self::drain`] is needed to get those transitions. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn set_from_usage_scope(&mut self, scope: &BufferUsageScope<A>) { + let incoming_size = scope.state.len(); + if incoming_size > self.start.len() { + self.set_size(incoming_size); + } + + for index in scope.metadata.owned_indices() { + self.tracker_assert_in_bounds(index); + scope.tracker_assert_in_bounds(index); + unsafe { + insert_or_barrier_update( + None, + Some(&mut self.start), + &mut self.end, + &mut self.metadata, + index as u32, + index, + BufferStateProvider::Indirect { + state: &scope.state, + }, + None, + ResourceMetadataProvider::Indirect { + metadata: &scope.metadata, + }, + &mut self.temp, + ) + } + } + } + + /// Iterates through all buffers in the given bind group and adopts + /// the state given for those buffers in the UsageScope. It also + /// removes all touched buffers from the usage scope. + /// + /// If a transition is needed to get the buffers into the needed state, + /// those transitions are stored within the tracker. A subsequent + /// call to [`Self::drain`] is needed to get those transitions. + /// + /// This is a really funky method used by Compute Passes to generate + /// barriers after a call to dispatch without needing to iterate + /// over all elements in the usage scope. We use each the + /// a given iterator of ids as a source of which IDs to look at. + /// All the IDs must have first been added to the usage scope. + /// + /// # Safety + /// + /// [`Self::set_size`] must be called with the maximum possible Buffer ID before this + /// method is called. + pub unsafe fn set_and_remove_from_usage_scope_sparse( + &mut self, + scope: &mut BufferUsageScope<A>, + id_source: impl IntoIterator<Item = Valid<BufferId>>, + ) { + let incoming_size = scope.state.len(); + if incoming_size > self.start.len() { + self.set_size(incoming_size); + } + + for id in id_source { + let (index32, _, _) = id.0.unzip(); + let index = index32 as usize; + + scope.tracker_assert_in_bounds(index); + + if unsafe { !scope.metadata.contains_unchecked(index) } { + continue; + } + unsafe { + insert_or_barrier_update( + None, + Some(&mut self.start), + &mut self.end, + &mut self.metadata, + index as u32, + index, + BufferStateProvider::Indirect { + state: &scope.state, + }, + None, + ResourceMetadataProvider::Indirect { + metadata: &scope.metadata, + }, + &mut self.temp, + ) + }; + + unsafe { scope.metadata.remove(index) }; + } + } + + /// Removes the given resource from the tracker iff we have the last reference to the + /// resource and the epoch matches. + /// + /// Returns true if the resource was removed. + /// + /// If the ID is higher than the length of internal vectors, + /// false will be returned. + pub fn remove_abandoned(&mut self, id: Valid<BufferId>) -> bool { + let (index32, epoch, _) = id.0.unzip(); + let index = index32 as usize; + + if index > self.metadata.size() { + return false; + } + + self.tracker_assert_in_bounds(index); + + unsafe { + if self.metadata.contains_unchecked(index) { + let existing_epoch = self.metadata.get_epoch_unchecked(index); + let existing_ref_count = self.metadata.get_ref_count_unchecked(index); + + if existing_epoch == epoch && existing_ref_count.load() == 1 { + self.metadata.remove(index); + return true; + } + } + } + + false + } +} + +/// Source of Buffer State. +#[derive(Debug, Clone)] +enum BufferStateProvider<'a> { + /// Get a state that was provided directly. + Direct { state: BufferUses }, + /// Get a state from an an array of states. + Indirect { state: &'a [BufferUses] }, +} +impl BufferStateProvider<'_> { + /// Gets the state from the provider, given a resource ID index. + /// + /// # Safety + /// + /// Index must be in bounds for the indirect source iff this is in the indirect state. + #[inline(always)] + unsafe fn get_state(&self, index: usize) -> BufferUses { + match *self { + BufferStateProvider::Direct { state } => state, + BufferStateProvider::Indirect { state } => { + strict_assert!(index < state.len()); + *unsafe { state.get_unchecked(index) } + } + } + } +} + +/// Does an insertion operation if the index isn't tracked +/// in the current metadata, otherwise merges the given state +/// with the current state. If the merging would cause +/// a conflict, returns that usage conflict. +/// +/// # Safety +/// +/// Indexes must be valid indexes into all arrays passed in +/// to this function, either directly or via metadata or provider structs. +#[inline(always)] +unsafe fn insert_or_merge<A: hub::HalApi>( + life_guard: Option<&LifeGuard>, + start_states: Option<&mut [BufferUses]>, + current_states: &mut [BufferUses], + resource_metadata: &mut ResourceMetadata<A>, + index32: u32, + index: usize, + state_provider: BufferStateProvider<'_>, + metadata_provider: ResourceMetadataProvider<'_, A>, +) -> Result<(), UsageConflict> { + let currently_owned = unsafe { resource_metadata.contains_unchecked(index) }; + + if !currently_owned { + unsafe { + insert( + life_guard, + start_states, + current_states, + resource_metadata, + index, + state_provider, + None, + metadata_provider, + ) + }; + return Ok(()); + } + + unsafe { + merge( + current_states, + index32, + index, + state_provider, + metadata_provider, + ) + } +} + +/// If the resource isn't tracked +/// - Inserts the given resource. +/// - Uses the `start_state_provider` to populate `start_states` +/// - Uses either `end_state_provider` or `start_state_provider` +/// to populate `current_states`. +/// If the resource is tracked +/// - Inserts barriers from the state in `current_states` +/// to the state provided by `start_state_provider`. +/// - Updates the `current_states` with either the state from +/// `end_state_provider` or `start_state_provider`. +/// +/// Any barriers are added to the barrier vector. +/// +/// # Safety +/// +/// Indexes must be valid indexes into all arrays passed in +/// to this function, either directly or via metadata or provider structs. +#[inline(always)] +unsafe fn insert_or_barrier_update<A: hub::HalApi>( + life_guard: Option<&LifeGuard>, + start_states: Option<&mut [BufferUses]>, + current_states: &mut [BufferUses], + resource_metadata: &mut ResourceMetadata<A>, + index32: u32, + index: usize, + start_state_provider: BufferStateProvider<'_>, + end_state_provider: Option<BufferStateProvider<'_>>, + metadata_provider: ResourceMetadataProvider<'_, A>, + barriers: &mut Vec<PendingTransition<BufferUses>>, +) { + let currently_owned = unsafe { resource_metadata.contains_unchecked(index) }; + + if !currently_owned { + unsafe { + insert( + life_guard, + start_states, + current_states, + resource_metadata, + index, + start_state_provider, + end_state_provider, + metadata_provider, + ) + }; + return; + } + + let update_state_provider = end_state_provider.unwrap_or_else(|| start_state_provider.clone()); + unsafe { + barrier( + current_states, + index32, + index, + start_state_provider, + barriers, + ) + }; + + unsafe { update(current_states, index, update_state_provider) }; +} + +#[inline(always)] +unsafe fn insert<A: hub::HalApi>( + life_guard: Option<&LifeGuard>, + start_states: Option<&mut [BufferUses]>, + current_states: &mut [BufferUses], + resource_metadata: &mut ResourceMetadata<A>, + index: usize, + start_state_provider: BufferStateProvider<'_>, + end_state_provider: Option<BufferStateProvider<'_>>, + metadata_provider: ResourceMetadataProvider<'_, A>, +) { + let new_start_state = unsafe { start_state_provider.get_state(index) }; + let new_end_state = + end_state_provider.map_or(new_start_state, |p| unsafe { p.get_state(index) }); + + // This should only ever happen with a wgpu bug, but let's just double + // check that resource states don't have any conflicts. + strict_assert_eq!(invalid_resource_state(new_start_state), false); + strict_assert_eq!(invalid_resource_state(new_end_state), false); + + log::trace!("\tbuf {index}: insert {new_start_state:?}..{new_end_state:?}"); + + unsafe { + if let Some(&mut ref mut start_state) = start_states { + *start_state.get_unchecked_mut(index) = new_start_state; + } + *current_states.get_unchecked_mut(index) = new_end_state; + + let (epoch, ref_count) = metadata_provider.get_own(life_guard, index); + resource_metadata.insert(index, epoch, ref_count); + } +} + +#[inline(always)] +unsafe fn merge<A: hub::HalApi>( + current_states: &mut [BufferUses], + index32: u32, + index: usize, + state_provider: BufferStateProvider<'_>, + metadata_provider: ResourceMetadataProvider<'_, A>, +) -> Result<(), UsageConflict> { + let current_state = unsafe { current_states.get_unchecked_mut(index) }; + let new_state = unsafe { state_provider.get_state(index) }; + + let merged_state = *current_state | new_state; + + if invalid_resource_state(merged_state) { + return Err(UsageConflict::from_buffer( + BufferId::zip( + index32, + unsafe { metadata_provider.get_epoch(index) }, + A::VARIANT, + ), + *current_state, + new_state, + )); + } + + log::trace!("\tbuf {index32}: merge {current_state:?} + {new_state:?}"); + + *current_state = merged_state; + + Ok(()) +} + +#[inline(always)] +unsafe fn barrier( + current_states: &mut [BufferUses], + index32: u32, + index: usize, + state_provider: BufferStateProvider<'_>, + barriers: &mut Vec<PendingTransition<BufferUses>>, +) { + let current_state = unsafe { *current_states.get_unchecked(index) }; + let new_state = unsafe { state_provider.get_state(index) }; + + if skip_barrier(current_state, new_state) { + return; + } + + barriers.push(PendingTransition { + id: index32, + selector: (), + usage: current_state..new_state, + }); + + log::trace!("\tbuf {index32}: transition {current_state:?} -> {new_state:?}"); +} + +#[inline(always)] +unsafe fn update( + current_states: &mut [BufferUses], + index: usize, + state_provider: BufferStateProvider<'_>, +) { + let current_state = unsafe { current_states.get_unchecked_mut(index) }; + let new_state = unsafe { state_provider.get_state(index) }; + + *current_state = new_state; +} diff --git a/third_party/rust/wgpu-core/src/track/metadata.rs b/third_party/rust/wgpu-core/src/track/metadata.rs new file mode 100644 index 0000000000..73da0d6c5d --- /dev/null +++ b/third_party/rust/wgpu-core/src/track/metadata.rs @@ -0,0 +1,269 @@ +//! The `ResourceMetadata` type. + +use crate::{ + hub, + id::{self, TypedId}, + Epoch, LifeGuard, RefCount, +}; +use bit_vec::BitVec; +use std::{borrow::Cow, marker::PhantomData, mem}; +use wgt::strict_assert; + +/// A set of resources, holding a [`RefCount`] and epoch for each member. +/// +/// Testing for membership is fast, and iterating over members is +/// reasonably fast in practice. Storage consumption is proportional +/// to the largest id index of any member, not to the number of +/// members, but a bit vector tracks occupancy, so iteration touches +/// only occupied elements. +#[derive(Debug)] +pub(super) struct ResourceMetadata<A: hub::HalApi> { + /// If the resource with index `i` is a member, `owned[i]` is `true`. + owned: BitVec<usize>, + + /// A vector parallel to `owned`, holding clones of members' `RefCount`s. + ref_counts: Vec<Option<RefCount>>, + + /// A vector parallel to `owned`, holding the epoch of each members' id. + epochs: Vec<Epoch>, + + /// This tells Rust that this type should be covariant with `A`. + _phantom: PhantomData<A>, +} + +impl<A: hub::HalApi> ResourceMetadata<A> { + pub(super) fn new() -> Self { + Self { + owned: BitVec::default(), + ref_counts: Vec::new(), + epochs: Vec::new(), + + _phantom: PhantomData, + } + } + + /// Returns the number of indices we can accommodate. + pub(super) fn size(&self) -> usize { + self.owned.len() + } + + pub(super) fn set_size(&mut self, size: usize) { + self.ref_counts.resize(size, None); + self.epochs.resize(size, u32::MAX); + + resize_bitvec(&mut self.owned, size); + } + + /// Ensures a given index is in bounds for all arrays and does + /// sanity checks of the presence of a refcount. + /// + /// In release mode this function is completely empty and is removed. + #[cfg_attr(not(feature = "strict_asserts"), allow(unused_variables))] + pub(super) fn tracker_assert_in_bounds(&self, index: usize) { + strict_assert!(index < self.owned.len()); + strict_assert!(index < self.ref_counts.len()); + strict_assert!(index < self.epochs.len()); + + strict_assert!(if self.contains(index) { + self.ref_counts[index].is_some() + } else { + true + }); + } + + /// Returns true if the tracker owns no resources. + /// + /// This is a O(n) operation. + pub(super) fn is_empty(&self) -> bool { + !self.owned.any() + } + + /// Returns true if the set contains the resource with the given index. + pub(super) fn contains(&self, index: usize) -> bool { + self.owned[index] + } + + /// Returns true if the set contains the resource with the given index. + /// + /// # Safety + /// + /// The given `index` must be in bounds for this `ResourceMetadata`'s + /// existing tables. See `tracker_assert_in_bounds`. + #[inline(always)] + pub(super) unsafe fn contains_unchecked(&self, index: usize) -> bool { + unsafe { self.owned.get(index).unwrap_unchecked() } + } + + /// Insert a resource into the set. + /// + /// Add the resource with the given index, epoch, and reference count to the + /// set. + /// + /// # Safety + /// + /// The given `index` must be in bounds for this `ResourceMetadata`'s + /// existing tables. See `tracker_assert_in_bounds`. + #[inline(always)] + pub(super) unsafe fn insert(&mut self, index: usize, epoch: Epoch, ref_count: RefCount) { + self.owned.set(index, true); + unsafe { + *self.epochs.get_unchecked_mut(index) = epoch; + *self.ref_counts.get_unchecked_mut(index) = Some(ref_count); + } + } + + /// Get the [`RefCount`] of the resource with the given index. + /// + /// # Safety + /// + /// The given `index` must be in bounds for this `ResourceMetadata`'s + /// existing tables. See `tracker_assert_in_bounds`. + #[inline(always)] + pub(super) unsafe fn get_ref_count_unchecked(&self, index: usize) -> &RefCount { + unsafe { + self.ref_counts + .get_unchecked(index) + .as_ref() + .unwrap_unchecked() + } + } + + /// Get the [`Epoch`] of the id of the resource with the given index. + /// + /// # Safety + /// + /// The given `index` must be in bounds for this `ResourceMetadata`'s + /// existing tables. See `tracker_assert_in_bounds`. + #[inline(always)] + pub(super) unsafe fn get_epoch_unchecked(&self, index: usize) -> Epoch { + unsafe { *self.epochs.get_unchecked(index) } + } + + /// Returns an iterator over the ids for all resources owned by `self`. + pub(super) fn owned_ids<Id: TypedId>(&self) -> impl Iterator<Item = id::Valid<Id>> + '_ { + if !self.owned.is_empty() { + self.tracker_assert_in_bounds(self.owned.len() - 1) + }; + iterate_bitvec_indices(&self.owned).map(move |index| { + let epoch = unsafe { *self.epochs.get_unchecked(index) }; + id::Valid(Id::zip(index as u32, epoch, A::VARIANT)) + }) + } + + /// Returns an iterator over the indices of all resources owned by `self`. + pub(super) fn owned_indices(&self) -> impl Iterator<Item = usize> + '_ { + if !self.owned.is_empty() { + self.tracker_assert_in_bounds(self.owned.len() - 1) + }; + iterate_bitvec_indices(&self.owned) + } + + /// Remove the resource with the given index from the set. + pub(super) unsafe fn remove(&mut self, index: usize) { + unsafe { + *self.ref_counts.get_unchecked_mut(index) = None; + *self.epochs.get_unchecked_mut(index) = u32::MAX; + } + self.owned.set(index, false); + } +} + +/// A source of resource metadata. +/// +/// This is used to abstract over the various places +/// trackers can get new resource metadata from. +pub(super) enum ResourceMetadataProvider<'a, A: hub::HalApi> { + /// Comes directly from explicit values. + Direct { + epoch: Epoch, + ref_count: Cow<'a, RefCount>, + }, + /// Comes from another metadata tracker. + Indirect { metadata: &'a ResourceMetadata<A> }, + /// The epoch is given directly, but the life count comes from the resource itself. + Resource { epoch: Epoch }, +} +impl<A: hub::HalApi> ResourceMetadataProvider<'_, A> { + /// Get the epoch and an owned refcount from this. + /// + /// # Safety + /// + /// - The index must be in bounds of the metadata tracker if this uses an indirect source. + /// - life_guard must be Some if this uses a Resource source. + #[inline(always)] + pub(super) unsafe fn get_own( + self, + life_guard: Option<&LifeGuard>, + index: usize, + ) -> (Epoch, RefCount) { + match self { + ResourceMetadataProvider::Direct { epoch, ref_count } => { + (epoch, ref_count.into_owned()) + } + ResourceMetadataProvider::Indirect { metadata } => { + metadata.tracker_assert_in_bounds(index); + (unsafe { *metadata.epochs.get_unchecked(index) }, { + let ref_count = unsafe { metadata.ref_counts.get_unchecked(index) }; + unsafe { ref_count.clone().unwrap_unchecked() } + }) + } + ResourceMetadataProvider::Resource { epoch } => { + strict_assert!(life_guard.is_some()); + (epoch, unsafe { life_guard.unwrap_unchecked() }.add_ref()) + } + } + } + /// Get the epoch from this. + /// + /// # Safety + /// + /// - The index must be in bounds of the metadata tracker if this uses an indirect source. + #[inline(always)] + pub(super) unsafe fn get_epoch(self, index: usize) -> Epoch { + match self { + ResourceMetadataProvider::Direct { epoch, .. } + | ResourceMetadataProvider::Resource { epoch, .. } => epoch, + ResourceMetadataProvider::Indirect { metadata } => { + metadata.tracker_assert_in_bounds(index); + unsafe { *metadata.epochs.get_unchecked(index) } + } + } + } +} + +/// Resizes the given bitvec to the given size. I'm not sure why this is hard to do but it is. +fn resize_bitvec<B: bit_vec::BitBlock>(vec: &mut BitVec<B>, size: usize) { + let owned_size_to_grow = size.checked_sub(vec.len()); + if let Some(delta) = owned_size_to_grow { + if delta != 0 { + vec.grow(delta, false); + } + } else { + vec.truncate(size); + } +} + +/// Produces an iterator that yields the indexes of all bits that are set in the bitvec. +/// +/// Will skip entire usize's worth of bits if they are all false. +fn iterate_bitvec_indices(ownership: &BitVec<usize>) -> impl Iterator<Item = usize> + '_ { + const BITS_PER_BLOCK: usize = mem::size_of::<usize>() * 8; + + let size = ownership.len(); + + ownership + .blocks() + .enumerate() + .filter(|&(_, word)| word != 0) + .flat_map(move |(word_index, mut word)| { + let bit_start = word_index * BITS_PER_BLOCK; + let bit_end = (bit_start + BITS_PER_BLOCK).min(size); + + (bit_start..bit_end).filter(move |_| { + let active = word & 0b1 != 0; + word >>= 1; + + active + }) + }) +} diff --git a/third_party/rust/wgpu-core/src/track/mod.rs b/third_party/rust/wgpu-core/src/track/mod.rs new file mode 100644 index 0000000000..c5bfa68ebb --- /dev/null +++ b/third_party/rust/wgpu-core/src/track/mod.rs @@ -0,0 +1,610 @@ +/*! Resource State and Lifetime Trackers + +These structures are responsible for keeping track of resource state, +generating barriers where needed, and making sure resources are kept +alive until the trackers die. + +## General Architecture + +Tracking is some of the hottest code in the entire codebase, so the trackers +are designed to be as cache efficient as possible. They store resource state +in flat vectors, storing metadata SOA style, one vector per type of metadata. + +A lot of the tracker code is deeply unsafe, using unchecked accesses all over +to make performance as good as possible. However, for all unsafe accesses, there +is a corresponding debug assert the checks if that access is valid. This helps +get bugs caught fast, while still letting users not need to pay for the bounds +checks. + +In wgpu, resource IDs are allocated and re-used, so will always be as low +as reasonably possible. This allows us to use the ID as an index into an array. + +## Statefulness + +There are two main types of trackers, stateful and stateless. + +Stateful trackers are for buffers and textures. They both have +resource state attached to them which needs to be used to generate +automatic synchronization. Because of the different requirements of +buffers and textures, they have two separate tracking structures. + +Stateless trackers only store metadata and own the given resource. + +## Use Case + +Within each type of tracker, the trackers are further split into 3 different +use cases, Bind Group, Usage Scope, and a full Tracker. + +Bind Group trackers are just a list of different resources, their refcount, +and how they are used. Textures are used via a selector and a usage type. +Buffers by just a usage type. Stateless resources don't have a usage type. + +Usage Scope trackers are only for stateful resources. These trackers represent +a single [`UsageScope`] in the spec. When a use is added to a usage scope, +it is merged with all other uses of that resource in that scope. If there +is a usage conflict, merging will fail and an error will be reported. + +Full trackers represent a before and after state of a resource. These +are used for tracking on the device and on command buffers. The before +state represents the state the resource is first used as in the command buffer, +the after state is the state the command buffer leaves the resource in. +These double ended buffers can then be used to generate the needed transitions +between command buffers. + +## Dense Datastructure with Sparse Data + +This tracking system is based on having completely dense data, but trackers do +not always contain every resource. Some resources (or even most resources) go +unused in any given command buffer. So to help speed up the process of iterating +through possibly thousands of resources, we use a bit vector to represent if +a resource is in the buffer or not. This allows us extremely efficient memory +utilization, as well as being able to bail out of whole blocks of 32-64 resources +with a single usize comparison with zero. In practice this means that merging +partially resident buffers is extremely quick. + +The main advantage of this dense datastructure is that we can do merging +of trackers in an extremely efficient fashion that results in us doing linear +scans down a couple of buffers. CPUs and their caches absolutely eat this up. + +## Stateful Resource Operations + +All operations on stateful trackers boil down to one of four operations: +- `insert(tracker, new_state)` adds a resource with a given state to the tracker + for the first time. +- `merge(tracker, new_state)` merges this new state with the previous state, checking + for usage conflicts. +- `barrier(tracker, new_state)` compares the given state to the existing state and + generates the needed barriers. +- `update(tracker, new_state)` takes the given new state and overrides the old state. + +This allows us to compose the operations to form the various kinds of tracker merges +that need to happen in the codebase. For each resource in the given merger, the following +operation applies: + +```text +UsageScope <- Resource = insert(scope, usage) OR merge(scope, usage) +UsageScope <- UsageScope = insert(scope, scope) OR merge(scope, scope) +CommandBuffer <- UsageScope = insert(buffer.start, buffer.end, scope) + OR barrier(buffer.end, scope) + update(buffer.end, scope) +Device <- CommandBuffer = insert(device.start, device.end, buffer.start, buffer.end) + OR barrier(device.end, buffer.start) + update(device.end, buffer.end) +``` + +[`UsageScope`]: https://gpuweb.github.io/gpuweb/#programming-model-synchronization +*/ + +mod buffer; +mod metadata; +mod range; +mod stateless; +mod texture; + +use crate::{ + binding_model, command, conv, hub, + id::{self, TypedId}, + pipeline, resource, +}; + +use std::{fmt, num::NonZeroU32, ops}; +use thiserror::Error; + +pub(crate) use buffer::{BufferBindGroupState, BufferTracker, BufferUsageScope}; +use metadata::{ResourceMetadata, ResourceMetadataProvider}; +pub(crate) use stateless::{StatelessBindGroupSate, StatelessTracker}; +pub(crate) use texture::{ + TextureBindGroupState, TextureSelector, TextureTracker, TextureUsageScope, +}; +use wgt::strict_assert_ne; + +/// A structure containing all the information about a particular resource +/// transition. User code should be able to generate a pipeline barrier +/// based on the contents. +#[derive(Debug, PartialEq)] +pub(crate) struct PendingTransition<S: ResourceUses> { + pub id: u32, + pub selector: S::Selector, + pub usage: ops::Range<S>, +} + +impl PendingTransition<hal::BufferUses> { + /// Produce the hal barrier corresponding to the transition. + pub fn into_hal<'a, A: hal::Api>( + self, + buf: &'a resource::Buffer<A>, + ) -> hal::BufferBarrier<'a, A> { + let buffer = buf.raw.as_ref().expect("Buffer is destroyed"); + hal::BufferBarrier { + buffer, + usage: self.usage, + } + } +} + +impl PendingTransition<hal::TextureUses> { + /// Produce the hal barrier corresponding to the transition. + pub fn into_hal<'a, A: hal::Api>( + self, + tex: &'a resource::Texture<A>, + ) -> hal::TextureBarrier<'a, A> { + let texture = tex.inner.as_raw().expect("Texture is destroyed"); + + // These showing up in a barrier is always a bug + strict_assert_ne!(self.usage.start, hal::TextureUses::UNKNOWN); + strict_assert_ne!(self.usage.end, hal::TextureUses::UNKNOWN); + + let mip_count = self.selector.mips.end - self.selector.mips.start; + strict_assert_ne!(mip_count, 0); + let layer_count = self.selector.layers.end - self.selector.layers.start; + strict_assert_ne!(layer_count, 0); + + hal::TextureBarrier { + texture, + range: wgt::ImageSubresourceRange { + aspect: wgt::TextureAspect::All, + base_mip_level: self.selector.mips.start, + mip_level_count: unsafe { Some(NonZeroU32::new_unchecked(mip_count)) }, + base_array_layer: self.selector.layers.start, + array_layer_count: unsafe { Some(NonZeroU32::new_unchecked(layer_count)) }, + }, + usage: self.usage, + } + } +} + +/// The uses that a resource or subresource can be in. +pub(crate) trait ResourceUses: + fmt::Debug + ops::BitAnd<Output = Self> + ops::BitOr<Output = Self> + PartialEq + Sized + Copy +{ + /// All flags that are exclusive. + const EXCLUSIVE: Self; + + /// The relevant resource ID type. + type Id: Copy + fmt::Debug + TypedId; + /// The selector used by this resource. + type Selector: fmt::Debug; + + /// Turn the resource into a pile of bits. + fn bits(self) -> u16; + /// Returns true if the all the uses are ordered. + fn all_ordered(self) -> bool; + /// Returns true if any of the uses are exclusive. + fn any_exclusive(self) -> bool; +} + +/// Returns true if the given states violates the usage scope rule +/// of any(inclusive) XOR one(exclusive) +fn invalid_resource_state<T: ResourceUses>(state: T) -> bool { + // Is power of two also means "is one bit set". We check for this as if + // we're in any exclusive state, we must only be in a single state. + state.any_exclusive() && !conv::is_power_of_two_u16(state.bits()) +} + +/// Returns true if the transition from one state to another does not require +/// a barrier. +fn skip_barrier<T: ResourceUses>(old_state: T, new_state: T) -> bool { + // If the state didn't change and all the usages are ordered, the hardware + // will guarentee the order of accesses, so we do not need to issue a barrier at all + old_state == new_state && old_state.all_ordered() +} + +#[derive(Clone, Debug, Error, Eq, PartialEq)] +pub enum UsageConflict { + #[error("Attempted to use invalid buffer")] + BufferInvalid { id: id::BufferId }, + #[error("Attempted to use invalid texture")] + TextureInvalid { id: id::TextureId }, + #[error("Attempted to use buffer with {invalid_use}.")] + Buffer { + id: id::BufferId, + invalid_use: InvalidUse<hal::BufferUses>, + }, + #[error("Attempted to use a texture (mips {mip_levels:?} layers {array_layers:?}) with {invalid_use}.")] + Texture { + id: id::TextureId, + mip_levels: ops::Range<u32>, + array_layers: ops::Range<u32>, + invalid_use: InvalidUse<hal::TextureUses>, + }, +} + +impl UsageConflict { + fn from_buffer( + id: id::BufferId, + current_state: hal::BufferUses, + new_state: hal::BufferUses, + ) -> Self { + Self::Buffer { + id, + invalid_use: InvalidUse { + current_state, + new_state, + }, + } + } + + fn from_texture( + id: id::TextureId, + selector: TextureSelector, + current_state: hal::TextureUses, + new_state: hal::TextureUses, + ) -> Self { + Self::Texture { + id, + mip_levels: selector.mips, + array_layers: selector.layers, + invalid_use: InvalidUse { + current_state, + new_state, + }, + } + } +} + +impl crate::error::PrettyError for UsageConflict { + fn fmt_pretty(&self, fmt: &mut crate::error::ErrorFormatter) { + fmt.error(self); + match *self { + Self::BufferInvalid { id } => { + fmt.buffer_label(&id); + } + Self::TextureInvalid { id } => { + fmt.texture_label(&id); + } + Self::Buffer { id, .. } => { + fmt.buffer_label(&id); + } + Self::Texture { id, .. } => { + fmt.texture_label(&id); + } + } + } +} + +/// Pretty print helper that shows helpful descriptions of a conflicting usage. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct InvalidUse<T> { + current_state: T, + new_state: T, +} + +impl<T: ResourceUses> fmt::Display for InvalidUse<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let current = self.current_state; + let new = self.new_state; + + let current_exclusive = current & T::EXCLUSIVE; + let new_exclusive = new & T::EXCLUSIVE; + + let exclusive = current_exclusive | new_exclusive; + + // The text starts with "tried to use X resource with {self}" + write!( + f, + "conflicting usages. Current usage {current:?} and new usage {new:?}. \ + {exclusive:?} is an exclusive usage and cannot be used with any other\ + usages within the usage scope (renderpass or compute dispatch)" + ) + } +} + +/// All the usages that a bind group contains. The uses are not deduplicated in any way +/// and may include conflicting uses. This is fully compliant by the WebGPU spec. +/// +/// All bind group states are sorted by their ID so that when adding to a tracker, +/// they are added in the most efficient order possible (assending order). +pub(crate) struct BindGroupStates<A: hub::HalApi> { + pub buffers: BufferBindGroupState<A>, + pub textures: TextureBindGroupState<A>, + pub views: StatelessBindGroupSate<resource::TextureView<A>, id::TextureViewId>, + pub samplers: StatelessBindGroupSate<resource::Sampler<A>, id::SamplerId>, +} + +impl<A: hub::HalApi> BindGroupStates<A> { + pub fn new() -> Self { + Self { + buffers: BufferBindGroupState::new(), + textures: TextureBindGroupState::new(), + views: StatelessBindGroupSate::new(), + samplers: StatelessBindGroupSate::new(), + } + } + + /// Optimize the bind group states by sorting them by ID. + /// + /// When this list of states is merged into a tracker, the memory + /// accesses will be in a constant assending order. + pub fn optimize(&mut self) { + self.buffers.optimize(); + self.textures.optimize(); + self.views.optimize(); + self.samplers.optimize(); + } +} + +/// This is a render bundle specific usage scope. It includes stateless resources +/// that are not normally included in a usage scope, but are used by render bundles +/// and need to be owned by the render bundles. +pub(crate) struct RenderBundleScope<A: hub::HalApi> { + pub buffers: BufferUsageScope<A>, + pub textures: TextureUsageScope<A>, + // Don't need to track views and samplers, they are never used directly, only by bind groups. + pub bind_groups: StatelessTracker<A, binding_model::BindGroup<A>, id::BindGroupId>, + pub render_pipelines: StatelessTracker<A, pipeline::RenderPipeline<A>, id::RenderPipelineId>, + pub query_sets: StatelessTracker<A, resource::QuerySet<A>, id::QuerySetId>, +} + +impl<A: hub::HalApi> RenderBundleScope<A> { + /// Create the render bundle scope and pull the maximum IDs from the hubs. + pub fn new( + buffers: &hub::Storage<resource::Buffer<A>, id::BufferId>, + textures: &hub::Storage<resource::Texture<A>, id::TextureId>, + bind_groups: &hub::Storage<binding_model::BindGroup<A>, id::BindGroupId>, + render_pipelines: &hub::Storage<pipeline::RenderPipeline<A>, id::RenderPipelineId>, + query_sets: &hub::Storage<resource::QuerySet<A>, id::QuerySetId>, + ) -> Self { + let mut value = Self { + buffers: BufferUsageScope::new(), + textures: TextureUsageScope::new(), + bind_groups: StatelessTracker::new(), + render_pipelines: StatelessTracker::new(), + query_sets: StatelessTracker::new(), + }; + + value.buffers.set_size(buffers.len()); + value.textures.set_size(textures.len()); + value.bind_groups.set_size(bind_groups.len()); + value.render_pipelines.set_size(render_pipelines.len()); + value.query_sets.set_size(query_sets.len()); + + value + } + + /// Merge the inner contents of a bind group into the render bundle tracker. + /// + /// Only stateful things are merged in here, all other resources are owned + /// indirectly by the bind group. + /// + /// # Safety + /// + /// The maximum ID given by each bind group resource must be less than the + /// length of the storage given at the call to `new`. + pub unsafe fn merge_bind_group( + &mut self, + textures: &hub::Storage<resource::Texture<A>, id::TextureId>, + bind_group: &BindGroupStates<A>, + ) -> Result<(), UsageConflict> { + unsafe { self.buffers.merge_bind_group(&bind_group.buffers)? }; + unsafe { + self.textures + .merge_bind_group(textures, &bind_group.textures)? + }; + + Ok(()) + } +} + +/// A usage scope tracker. Only needs to store stateful resources as stateless +/// resources cannot possibly have a usage conflict. +#[derive(Debug)] +pub(crate) struct UsageScope<A: hub::HalApi> { + pub buffers: BufferUsageScope<A>, + pub textures: TextureUsageScope<A>, +} + +impl<A: hub::HalApi> UsageScope<A> { + /// Create the render bundle scope and pull the maximum IDs from the hubs. + pub fn new( + buffers: &hub::Storage<resource::Buffer<A>, id::BufferId>, + textures: &hub::Storage<resource::Texture<A>, id::TextureId>, + ) -> Self { + let mut value = Self { + buffers: BufferUsageScope::new(), + textures: TextureUsageScope::new(), + }; + + value.buffers.set_size(buffers.len()); + value.textures.set_size(textures.len()); + + value + } + + /// Merge the inner contents of a bind group into the usage scope. + /// + /// Only stateful things are merged in here, all other resources are owned + /// indirectly by the bind group. + /// + /// # Safety + /// + /// The maximum ID given by each bind group resource must be less than the + /// length of the storage given at the call to `new`. + pub unsafe fn merge_bind_group( + &mut self, + textures: &hub::Storage<resource::Texture<A>, id::TextureId>, + bind_group: &BindGroupStates<A>, + ) -> Result<(), UsageConflict> { + unsafe { + self.buffers.merge_bind_group(&bind_group.buffers)?; + self.textures + .merge_bind_group(textures, &bind_group.textures)?; + } + + Ok(()) + } + + /// Merge the inner contents of a bind group into the usage scope. + /// + /// Only stateful things are merged in here, all other resources are owned + /// indirectly by a bind group or are merged directly into the command buffer tracker. + /// + /// # Safety + /// + /// The maximum ID given by each bind group resource must be less than the + /// length of the storage given at the call to `new`. + pub unsafe fn merge_render_bundle( + &mut self, + textures: &hub::Storage<resource::Texture<A>, id::TextureId>, + render_bundle: &RenderBundleScope<A>, + ) -> Result<(), UsageConflict> { + self.buffers.merge_usage_scope(&render_bundle.buffers)?; + self.textures + .merge_usage_scope(textures, &render_bundle.textures)?; + + Ok(()) + } +} + +/// A full double sided tracker used by CommandBuffers and the Device. +pub(crate) struct Tracker<A: hub::HalApi> { + pub buffers: BufferTracker<A>, + pub textures: TextureTracker<A>, + pub views: StatelessTracker<A, resource::TextureView<A>, id::TextureViewId>, + pub samplers: StatelessTracker<A, resource::Sampler<A>, id::SamplerId>, + pub bind_groups: StatelessTracker<A, binding_model::BindGroup<A>, id::BindGroupId>, + pub compute_pipelines: StatelessTracker<A, pipeline::ComputePipeline<A>, id::ComputePipelineId>, + pub render_pipelines: StatelessTracker<A, pipeline::RenderPipeline<A>, id::RenderPipelineId>, + pub bundles: StatelessTracker<A, command::RenderBundle<A>, id::RenderBundleId>, + pub query_sets: StatelessTracker<A, resource::QuerySet<A>, id::QuerySetId>, +} + +impl<A: hub::HalApi> Tracker<A> { + pub fn new() -> Self { + Self { + buffers: BufferTracker::new(), + textures: TextureTracker::new(), + views: StatelessTracker::new(), + samplers: StatelessTracker::new(), + bind_groups: StatelessTracker::new(), + compute_pipelines: StatelessTracker::new(), + render_pipelines: StatelessTracker::new(), + bundles: StatelessTracker::new(), + query_sets: StatelessTracker::new(), + } + } + + /// Pull the maximum IDs from the hubs. + pub fn set_size( + &mut self, + buffers: Option<&hub::Storage<resource::Buffer<A>, id::BufferId>>, + textures: Option<&hub::Storage<resource::Texture<A>, id::TextureId>>, + views: Option<&hub::Storage<resource::TextureView<A>, id::TextureViewId>>, + samplers: Option<&hub::Storage<resource::Sampler<A>, id::SamplerId>>, + bind_groups: Option<&hub::Storage<binding_model::BindGroup<A>, id::BindGroupId>>, + compute_pipelines: Option< + &hub::Storage<pipeline::ComputePipeline<A>, id::ComputePipelineId>, + >, + render_pipelines: Option<&hub::Storage<pipeline::RenderPipeline<A>, id::RenderPipelineId>>, + bundles: Option<&hub::Storage<command::RenderBundle<A>, id::RenderBundleId>>, + query_sets: Option<&hub::Storage<resource::QuerySet<A>, id::QuerySetId>>, + ) { + if let Some(buffers) = buffers { + self.buffers.set_size(buffers.len()); + }; + if let Some(textures) = textures { + self.textures.set_size(textures.len()); + }; + if let Some(views) = views { + self.views.set_size(views.len()); + }; + if let Some(samplers) = samplers { + self.samplers.set_size(samplers.len()); + }; + if let Some(bind_groups) = bind_groups { + self.bind_groups.set_size(bind_groups.len()); + }; + if let Some(compute_pipelines) = compute_pipelines { + self.compute_pipelines.set_size(compute_pipelines.len()); + } + if let Some(render_pipelines) = render_pipelines { + self.render_pipelines.set_size(render_pipelines.len()); + }; + if let Some(bundles) = bundles { + self.bundles.set_size(bundles.len()); + }; + if let Some(query_sets) = query_sets { + self.query_sets.set_size(query_sets.len()); + }; + } + + /// Iterates through all resources in the given bind group and adopts + /// the state given for those resources in the UsageScope. It also + /// removes all touched resources from the usage scope. + /// + /// If a transition is needed to get the resources into the needed + /// state, those transitions are stored within the tracker. A + /// subsequent call to [`BufferTracker::drain`] or + /// [`TextureTracker::drain`] is needed to get those transitions. + /// + /// This is a really funky method used by Compute Passes to generate + /// barriers after a call to dispatch without needing to iterate + /// over all elements in the usage scope. We use each the + /// bind group as a source of which IDs to look at. The bind groups + /// must have first been added to the usage scope. + /// + /// Only stateful things are merged in here, all other resources are owned + /// indirectly by the bind group. + /// + /// # Safety + /// + /// The maximum ID given by each bind group resource must be less than the + /// value given to `set_size` + pub unsafe fn set_and_remove_from_usage_scope_sparse( + &mut self, + textures: &hub::Storage<resource::Texture<A>, id::TextureId>, + scope: &mut UsageScope<A>, + bind_group: &BindGroupStates<A>, + ) { + unsafe { + self.buffers.set_and_remove_from_usage_scope_sparse( + &mut scope.buffers, + bind_group.buffers.used(), + ) + }; + unsafe { + self.textures.set_and_remove_from_usage_scope_sparse( + textures, + &mut scope.textures, + &bind_group.textures, + ) + }; + } + + /// Tracks the stateless resources from the given renderbundle. It is expected + /// that the stateful resources will get merged into a usage scope first. + /// + /// # Safety + /// + /// The maximum ID given by each bind group resource must be less than the + /// value given to `set_size` + pub unsafe fn add_from_render_bundle( + &mut self, + render_bundle: &RenderBundleScope<A>, + ) -> Result<(), UsageConflict> { + self.bind_groups + .add_from_tracker(&render_bundle.bind_groups); + self.render_pipelines + .add_from_tracker(&render_bundle.render_pipelines); + self.query_sets.add_from_tracker(&render_bundle.query_sets); + + Ok(()) + } +} diff --git a/third_party/rust/wgpu-core/src/track/range.rs b/third_party/rust/wgpu-core/src/track/range.rs new file mode 100644 index 0000000000..12c527fb2a --- /dev/null +++ b/third_party/rust/wgpu-core/src/track/range.rs @@ -0,0 +1,206 @@ +//Note: this could be the only place where we need `SmallVec`. +//TODO: consider getting rid of it. +use smallvec::SmallVec; + +use std::{fmt::Debug, iter, ops::Range}; + +/// Structure that keeps track of a I -> T mapping, +/// optimized for a case where keys of the same values +/// are often grouped together linearly. +#[derive(Clone, Debug, PartialEq)] +pub(crate) struct RangedStates<I, T> { + /// List of ranges, each associated with a singe value. + /// Ranges of keys have to be non-intersecting and ordered. + ranges: SmallVec<[(Range<I>, T); 1]>, +} + +impl<I: Copy + Ord, T: Copy + PartialEq> RangedStates<I, T> { + pub fn from_range(range: Range<I>, value: T) -> Self { + Self { + ranges: iter::once((range, value)).collect(), + } + } + + /// Construct a new instance from a slice of ranges. + #[cfg(test)] + pub fn from_slice(values: &[(Range<I>, T)]) -> Self { + Self { + ranges: values.iter().cloned().collect(), + } + } + + pub fn iter(&self) -> impl Iterator<Item = &(Range<I>, T)> + Clone { + self.ranges.iter() + } + + pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut (Range<I>, T)> { + self.ranges.iter_mut() + } + + /// Check that all the ranges are non-intersecting and ordered. + /// Panics otherwise. + #[cfg(test)] + fn check_sanity(&self) { + for a in self.ranges.iter() { + assert!(a.0.start < a.0.end); + } + for (a, b) in self.ranges.iter().zip(self.ranges[1..].iter()) { + assert!(a.0.end <= b.0.start); + } + } + + /// Merge the neighboring ranges together, where possible. + pub fn coalesce(&mut self) { + let mut num_removed = 0; + let mut iter = self.ranges.iter_mut(); + let mut cur = match iter.next() { + Some(elem) => elem, + None => return, + }; + for next in iter { + if cur.0.end == next.0.start && cur.1 == next.1 { + num_removed += 1; + cur.0.end = next.0.end; + next.0.end = next.0.start; + } else { + cur = next; + } + } + if num_removed != 0 { + self.ranges.retain(|pair| pair.0.start != pair.0.end); + } + } + + pub fn iter_filter<'a>( + &'a self, + range: &'a Range<I>, + ) -> impl Iterator<Item = (Range<I>, &T)> + 'a { + self.ranges + .iter() + .filter(move |&&(ref inner, ..)| inner.end > range.start && inner.start < range.end) + .map(move |&(ref inner, ref v)| { + let new_range = inner.start.max(range.start)..inner.end.min(range.end); + + (new_range, v) + }) + } + + /// Split the storage ranges in such a way that there is a linear subset of + /// them occupying exactly `index` range, which is returned mutably. + /// + /// Gaps in the ranges are filled with `default` value. + pub fn isolate(&mut self, index: &Range<I>, default: T) -> &mut [(Range<I>, T)] { + //TODO: implement this in 2 passes: + // 1. scan the ranges to figure out how many extra ones need to be inserted + // 2. go through the ranges by moving them them to the right and inserting the missing ones + + let mut start_pos = match self.ranges.iter().position(|pair| pair.0.end > index.start) { + Some(pos) => pos, + None => { + let pos = self.ranges.len(); + self.ranges.push((index.clone(), default)); + return &mut self.ranges[pos..]; + } + }; + + { + let (range, value) = self.ranges[start_pos].clone(); + if range.start < index.start { + self.ranges[start_pos].0.start = index.start; + self.ranges + .insert(start_pos, (range.start..index.start, value)); + start_pos += 1; + } + } + let mut pos = start_pos; + let mut range_pos = index.start; + loop { + let (range, value) = self.ranges[pos].clone(); + if range.start >= index.end { + self.ranges.insert(pos, (range_pos..index.end, default)); + pos += 1; + break; + } + if range.start > range_pos { + self.ranges.insert(pos, (range_pos..range.start, default)); + pos += 1; + range_pos = range.start; + } + if range.end >= index.end { + if range.end != index.end { + self.ranges[pos].0.start = index.end; + self.ranges.insert(pos, (range_pos..index.end, value)); + } + pos += 1; + break; + } + pos += 1; + range_pos = range.end; + if pos == self.ranges.len() { + self.ranges.push((range_pos..index.end, default)); + pos += 1; + break; + } + } + + &mut self.ranges[start_pos..pos] + } + + /// Helper method for isolation that checks the sanity of the results. + #[cfg(test)] + pub fn sanely_isolated(&self, index: Range<I>, default: T) -> Vec<(Range<I>, T)> { + let mut clone = self.clone(); + let result = clone.isolate(&index, default).to_vec(); + clone.check_sanity(); + result + } +} + +#[cfg(test)] +mod test { + //TODO: randomized/fuzzy testing + use super::RangedStates; + + #[test] + fn sane_good() { + let rs = RangedStates::from_slice(&[(1..4, 9u8), (4..5, 9)]); + rs.check_sanity(); + } + + #[test] + #[should_panic] + fn sane_empty() { + let rs = RangedStates::from_slice(&[(1..4, 9u8), (5..5, 9)]); + rs.check_sanity(); + } + + #[test] + #[should_panic] + fn sane_intersect() { + let rs = RangedStates::from_slice(&[(1..4, 9u8), (3..5, 9)]); + rs.check_sanity(); + } + + #[test] + fn coalesce() { + let mut rs = RangedStates::from_slice(&[(1..4, 9u8), (4..5, 9), (5..7, 1), (8..9, 1)]); + rs.coalesce(); + rs.check_sanity(); + assert_eq!(rs.ranges.as_slice(), &[(1..5, 9), (5..7, 1), (8..9, 1),]); + } + + #[test] + fn isolate() { + let rs = RangedStates::from_slice(&[(1..4, 9u8), (4..5, 9), (5..7, 1), (8..9, 1)]); + assert_eq!(&rs.sanely_isolated(4..5, 0), &[(4..5, 9u8),]); + assert_eq!( + &rs.sanely_isolated(0..6, 0), + &[(0..1, 0), (1..4, 9u8), (4..5, 9), (5..6, 1),] + ); + assert_eq!(&rs.sanely_isolated(8..10, 1), &[(8..9, 1), (9..10, 1),]); + assert_eq!( + &rs.sanely_isolated(6..9, 0), + &[(6..7, 1), (7..8, 0), (8..9, 1),] + ); + } +} diff --git a/third_party/rust/wgpu-core/src/track/stateless.rs b/third_party/rust/wgpu-core/src/track/stateless.rs new file mode 100644 index 0000000000..1d0fd5997a --- /dev/null +++ b/third_party/rust/wgpu-core/src/track/stateless.rs @@ -0,0 +1,194 @@ +/*! Stateless Trackers + * + * Stateless trackers don't have any state, so make no + * distinction between a usage scope and a full tracker. +!*/ + +use std::marker::PhantomData; + +use crate::{ + hub, + id::{TypedId, Valid}, + track::ResourceMetadata, + RefCount, +}; + +/// Stores all the resources that a bind group stores. +pub(crate) struct StatelessBindGroupSate<T, Id: TypedId> { + resources: Vec<(Valid<Id>, RefCount)>, + + _phantom: PhantomData<T>, +} + +impl<T: hub::Resource, Id: TypedId> StatelessBindGroupSate<T, Id> { + pub fn new() -> Self { + Self { + resources: Vec::new(), + + _phantom: PhantomData, + } + } + + /// Optimize the buffer bind group state by sorting it by ID. + /// + /// When this list of states is merged into a tracker, the memory + /// accesses will be in a constant assending order. + pub(crate) fn optimize(&mut self) { + self.resources + .sort_unstable_by_key(|&(id, _)| id.0.unzip().0); + } + + /// Returns a list of all resources tracked. May contain duplicates. + pub fn used(&self) -> impl Iterator<Item = Valid<Id>> + '_ { + self.resources.iter().map(|&(id, _)| id) + } + + /// Adds the given resource. + pub fn add_single<'a>(&mut self, storage: &'a hub::Storage<T, Id>, id: Id) -> Option<&'a T> { + let resource = storage.get(id).ok()?; + + self.resources + .push((Valid(id), resource.life_guard().add_ref())); + + Some(resource) + } +} + +/// Stores all resource state within a command buffer or device. +pub(crate) struct StatelessTracker<A: hub::HalApi, T, Id: TypedId> { + metadata: ResourceMetadata<A>, + + _phantom: PhantomData<(T, Id)>, +} + +impl<A: hub::HalApi, T: hub::Resource, Id: TypedId> StatelessTracker<A, T, Id> { + pub fn new() -> Self { + Self { + metadata: ResourceMetadata::new(), + + _phantom: PhantomData, + } + } + + fn tracker_assert_in_bounds(&self, index: usize) { + self.metadata.tracker_assert_in_bounds(index); + } + + /// Sets the size of all the vectors inside the tracker. + /// + /// Must be called with the highest possible Resource ID of this type + /// before all unsafe functions are called. + pub fn set_size(&mut self, size: usize) { + self.metadata.set_size(size); + } + + /// Extend the vectors to let the given index be valid. + fn allow_index(&mut self, index: usize) { + if index >= self.metadata.size() { + self.set_size(index + 1); + } + } + + /// Returns a list of all resources tracked. + pub fn used(&self) -> impl Iterator<Item = Valid<Id>> + '_ { + self.metadata.owned_ids() + } + + /// Inserts a single resource into the resource tracker. + /// + /// If the resource already exists in the tracker, it will be overwritten. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn insert_single(&mut self, id: Valid<Id>, ref_count: RefCount) { + let (index32, epoch, _) = id.0.unzip(); + let index = index32 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + self.metadata.insert(index, epoch, ref_count); + } + } + + /// Adds the given resource to the tracker. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn add_single<'a>(&mut self, storage: &'a hub::Storage<T, Id>, id: Id) -> Option<&'a T> { + let item = storage.get(id).ok()?; + + let (index32, epoch, _) = id.unzip(); + let index = index32 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + self.metadata + .insert(index, epoch, item.life_guard().add_ref()); + } + + Some(item) + } + + /// Adds the given resources from the given tracker. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn add_from_tracker(&mut self, other: &Self) { + let incoming_size = other.metadata.size(); + if incoming_size > self.metadata.size() { + self.set_size(incoming_size); + } + + for index in other.metadata.owned_indices() { + self.tracker_assert_in_bounds(index); + other.tracker_assert_in_bounds(index); + unsafe { + let previously_owned = self.metadata.contains_unchecked(index); + + if !previously_owned { + let epoch = other.metadata.get_epoch_unchecked(index); + let other_ref_count = other.metadata.get_ref_count_unchecked(index); + self.metadata.insert(index, epoch, other_ref_count.clone()); + } + } + } + } + + /// Removes the given resource from the tracker iff we have the last reference to the + /// resource and the epoch matches. + /// + /// Returns true if the resource was removed. + /// + /// If the ID is higher than the length of internal vectors, + /// false will be returned. + pub fn remove_abandoned(&mut self, id: Valid<Id>) -> bool { + let (index32, epoch, _) = id.0.unzip(); + let index = index32 as usize; + + if index > self.metadata.size() { + return false; + } + + self.tracker_assert_in_bounds(index); + + unsafe { + if self.metadata.contains_unchecked(index) { + let existing_epoch = self.metadata.get_epoch_unchecked(index); + let existing_ref_count = self.metadata.get_ref_count_unchecked(index); + + if existing_epoch == epoch && existing_ref_count.load() == 1 { + self.metadata.remove(index); + return true; + } + } + } + + false + } +} diff --git a/third_party/rust/wgpu-core/src/track/texture.rs b/third_party/rust/wgpu-core/src/track/texture.rs new file mode 100644 index 0000000000..6db2bab725 --- /dev/null +++ b/third_party/rust/wgpu-core/src/track/texture.rs @@ -0,0 +1,1551 @@ +/*! Texture Trackers + * + * Texture trackers are signifigantly more complicated than + * the buffer trackers because textures can be in a "complex" + * state where each individual subresource can potentially be + * in a different state from every other subtresource. These + * complex states are stored seperately from the simple states + * because they are signifignatly more difficult to track and + * most resources spend the vast majority of their lives in + * simple states. + * + * There are two special texture usages: `UNKNOWN` and `UNINITIALIZED`. + * - `UNKNOWN` is only used in complex states and is used to signify + * that the complex state does not know anything about those subresources. + * It cannot leak into transitions, it is invalid to transition into UNKNOWN + * state. + * - `UNINITIALIZED` is used in both simple and complex states to mean the texture + * is known to be in some undefined state. Any transition away from UNINITIALIZED + * will treat the contents as junk. +!*/ + +use super::{range::RangedStates, PendingTransition}; +use crate::{ + hub, + id::{TextureId, TypedId, Valid}, + resource::Texture, + track::{ + invalid_resource_state, skip_barrier, ResourceMetadata, ResourceMetadataProvider, + ResourceUses, UsageConflict, + }, + LifeGuard, RefCount, +}; +use hal::TextureUses; + +use arrayvec::ArrayVec; +use naga::FastHashMap; +use wgt::{strict_assert, strict_assert_eq}; + +use std::{borrow::Cow, iter, marker::PhantomData, ops::Range, vec::Drain}; + +/// Specifies a particular set of subresources in a texture. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct TextureSelector { + pub mips: Range<u32>, + pub layers: Range<u32>, +} + +impl ResourceUses for TextureUses { + const EXCLUSIVE: Self = Self::EXCLUSIVE; + + type Id = TextureId; + type Selector = TextureSelector; + + fn bits(self) -> u16 { + Self::bits(&self) + } + + fn all_ordered(self) -> bool { + Self::ORDERED.contains(self) + } + + fn any_exclusive(self) -> bool { + self.intersects(Self::EXCLUSIVE) + } +} + +/// Represents the complex state of textures where every subresource is potentially +/// in a different state. +#[derive(Clone, Debug, Default, PartialEq)] +struct ComplexTextureState { + mips: ArrayVec<RangedStates<u32, TextureUses>, { hal::MAX_MIP_LEVELS as usize }>, +} + +impl ComplexTextureState { + /// Creates complex texture state for the given sizes. + /// + /// This state will be initialized with the UNKNOWN state, a special state + /// which means the trakcer knows nothing about the state. + fn new(mip_level_count: u32, array_layer_count: u32) -> Self { + Self { + mips: iter::repeat_with(|| { + RangedStates::from_range(0..array_layer_count, TextureUses::UNKNOWN) + }) + .take(mip_level_count as usize) + .collect(), + } + } + + /// Initialize a complex state from a selector representing the full size of the texture + /// and an iterator of a selector and a texture use, specifying a usage for a specific + /// set of subresources. + /// + /// [`Self::to_selector_state_iter`] can be used to create such an iterator. + /// + /// # Safety + /// + /// All selectors in the iterator must be inside of the full_range selector. + /// + /// The full range selector must have mips and layers start at 0. + unsafe fn from_selector_state_iter( + full_range: TextureSelector, + state_iter: impl Iterator<Item = (TextureSelector, TextureUses)>, + ) -> Self { + strict_assert_eq!(full_range.layers.start, 0); + strict_assert_eq!(full_range.mips.start, 0); + + let mut complex = + ComplexTextureState::new(full_range.mips.len() as u32, full_range.layers.len() as u32); + for (selector, desired_state) in state_iter { + strict_assert!(selector.layers.end <= full_range.layers.end); + strict_assert!(selector.mips.end <= full_range.mips.end); + + // This should only ever happen with a wgpu bug, but let's just double + // check that resource states don't have any conflicts. + strict_assert_eq!(invalid_resource_state(desired_state), false); + + let mips = selector.mips.start as usize..selector.mips.end as usize; + for mip in unsafe { complex.mips.get_unchecked_mut(mips) } { + for &mut (_, ref mut state) in mip.isolate(&selector.layers, TextureUses::UNKNOWN) { + *state = desired_state; + } + } + } + complex + } + + /// Convert a complex state into an iterator over all states stored. + /// + /// [`Self::from_selector_state_iter`] can be used to consume such an iterator. + fn to_selector_state_iter( + &self, + ) -> impl Iterator<Item = (TextureSelector, TextureUses)> + Clone + '_ { + self.mips.iter().enumerate().flat_map(|(mip, inner)| { + let mip = mip as u32; + { + inner.iter().map(move |&(ref layers, inner)| { + ( + TextureSelector { + mips: mip..mip + 1, + layers: layers.clone(), + }, + inner, + ) + }) + } + }) + } +} + +/// Stores all the textures that a bind group stores. +pub(crate) struct TextureBindGroupState<A: hub::HalApi> { + textures: Vec<( + Valid<TextureId>, + Option<TextureSelector>, + RefCount, + TextureUses, + )>, + + _phantom: PhantomData<A>, +} +impl<A: hub::HalApi> TextureBindGroupState<A> { + pub fn new() -> Self { + Self { + textures: Vec::new(), + + _phantom: PhantomData, + } + } + + /// Optimize the texture bind group state by sorting it by ID. + /// + /// When this list of states is merged into a tracker, the memory + /// accesses will be in a constant assending order. + pub(crate) fn optimize(&mut self) { + self.textures + .sort_unstable_by_key(|&(id, _, _, _)| id.0.unzip().0); + } + + /// Returns a list of all buffers tracked. May contain duplicates. + pub fn used(&self) -> impl Iterator<Item = Valid<TextureId>> + '_ { + self.textures.iter().map(|&(id, _, _, _)| id) + } + + /// Adds the given resource with the given state. + pub fn add_single<'a>( + &mut self, + storage: &'a hub::Storage<Texture<A>, TextureId>, + id: TextureId, + ref_count: RefCount, + selector: Option<TextureSelector>, + state: TextureUses, + ) -> Option<&'a Texture<A>> { + let value = storage.get(id).ok()?; + + self.textures.push((Valid(id), selector, ref_count, state)); + + Some(value) + } +} + +/// Container for corresponding simple and complex texture states. +#[derive(Debug)] +pub(crate) struct TextureStateSet { + simple: Vec<TextureUses>, + complex: FastHashMap<u32, ComplexTextureState>, +} +impl TextureStateSet { + fn new() -> Self { + Self { + simple: Vec::new(), + complex: FastHashMap::default(), + } + } + + fn set_size(&mut self, size: usize) { + self.simple.resize(size, TextureUses::UNINITIALIZED); + } +} + +/// Stores all texture state within a single usage scope. +#[derive(Debug)] +pub(crate) struct TextureUsageScope<A: hub::HalApi> { + set: TextureStateSet, + + metadata: ResourceMetadata<A>, +} + +impl<A: hub::HalApi> TextureUsageScope<A> { + pub fn new() -> Self { + Self { + set: TextureStateSet::new(), + + metadata: ResourceMetadata::new(), + } + } + + fn tracker_assert_in_bounds(&self, index: usize) { + self.metadata.tracker_assert_in_bounds(index); + + strict_assert!(index < self.set.simple.len()); + + strict_assert!(if self.metadata.contains(index) + && self.set.simple[index] == TextureUses::COMPLEX + { + self.set.complex.contains_key(&(index as u32)) + } else { + true + }); + } + + /// Sets the size of all the vectors inside the tracker. + /// + /// Must be called with the highest possible Texture ID before + /// all unsafe functions are called. + pub fn set_size(&mut self, size: usize) { + self.set.set_size(size); + self.metadata.set_size(size); + } + + /// Returns a list of all textures tracked. + pub fn used(&self) -> impl Iterator<Item = Valid<TextureId>> + '_ { + self.metadata.owned_ids() + } + + /// Returns true if the tracker owns no resources. + /// + /// This is a O(n) operation. + pub(crate) fn is_empty(&self) -> bool { + self.metadata.is_empty() + } + + /// Merge the list of texture states in the given usage scope into this UsageScope. + /// + /// If any of the resulting states is invalid, stops the merge and returns a usage + /// conflict with the details of the invalid state. + /// + /// If the given tracker uses IDs higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn merge_usage_scope( + &mut self, + storage: &hub::Storage<Texture<A>, TextureId>, + scope: &Self, + ) -> Result<(), UsageConflict> { + let incoming_size = scope.set.simple.len(); + if incoming_size > self.set.simple.len() { + self.set_size(incoming_size); + } + + for index in scope.metadata.owned_indices() { + let index32 = index as u32; + + self.tracker_assert_in_bounds(index); + scope.tracker_assert_in_bounds(index); + + let texture_data = unsafe { texture_data_from_texture(storage, index32) }; + unsafe { + insert_or_merge( + texture_data, + &mut self.set, + &mut self.metadata, + index32, + index, + TextureStateProvider::TextureSet { set: &scope.set }, + ResourceMetadataProvider::Indirect { + metadata: &scope.metadata, + }, + )? + }; + } + + Ok(()) + } + + /// Merge the list of texture states in the given bind group into this usage scope. + /// + /// If any of the resulting states is invalid, stops the merge and returns a usage + /// conflict with the details of the invalid state. + /// + /// Because bind groups do not check if the union of all their states is valid, + /// this method is allowed to return Err on the first bind group bound. + /// + /// # Safety + /// + /// [`Self::set_size`] must be called with the maximum possible Buffer ID before this + /// method is called. + pub unsafe fn merge_bind_group( + &mut self, + storage: &hub::Storage<Texture<A>, TextureId>, + bind_group: &TextureBindGroupState<A>, + ) -> Result<(), UsageConflict> { + for &(id, ref selector, ref ref_count, state) in &bind_group.textures { + unsafe { self.merge_single(storage, id, selector.clone(), ref_count, state)? }; + } + + Ok(()) + } + + /// Merge a single state into the UsageScope. + /// + /// If the resulting state is invalid, returns a usage + /// conflict with the details of the invalid state. + /// + /// # Safety + /// + /// Unlike other trackers whose merge_single is safe, this method is only + /// called where there is already other unsafe tracking functions active, + /// so we can prove this unsafe "for free". + /// + /// [`Self::set_size`] must be called with the maximum possible Buffer ID before this + /// method is called. + pub unsafe fn merge_single( + &mut self, + storage: &hub::Storage<Texture<A>, TextureId>, + id: Valid<TextureId>, + selector: Option<TextureSelector>, + ref_count: &RefCount, + new_state: TextureUses, + ) -> Result<(), UsageConflict> { + let (index32, epoch, _) = id.0.unzip(); + let index = index32 as usize; + + self.tracker_assert_in_bounds(index); + + let texture_data = unsafe { texture_data_from_texture(storage, index32) }; + unsafe { + insert_or_merge( + texture_data, + &mut self.set, + &mut self.metadata, + index32, + index, + TextureStateProvider::from_option(selector, new_state), + ResourceMetadataProvider::Direct { + epoch, + ref_count: Cow::Borrowed(ref_count), + }, + )? + }; + + Ok(()) + } +} + +/// Stores all texture state within a command buffer or device. +pub(crate) struct TextureTracker<A: hub::HalApi> { + start_set: TextureStateSet, + end_set: TextureStateSet, + + metadata: ResourceMetadata<A>, + + temp: Vec<PendingTransition<TextureUses>>, + + _phantom: PhantomData<A>, +} +impl<A: hub::HalApi> TextureTracker<A> { + pub fn new() -> Self { + Self { + start_set: TextureStateSet::new(), + end_set: TextureStateSet::new(), + + metadata: ResourceMetadata::new(), + + temp: Vec::new(), + + _phantom: PhantomData, + } + } + + fn tracker_assert_in_bounds(&self, index: usize) { + self.metadata.tracker_assert_in_bounds(index); + + strict_assert!(index < self.start_set.simple.len()); + strict_assert!(index < self.end_set.simple.len()); + + strict_assert!(if self.metadata.contains(index) + && self.start_set.simple[index] == TextureUses::COMPLEX + { + self.start_set.complex.contains_key(&(index as u32)) + } else { + true + }); + strict_assert!(if self.metadata.contains(index) + && self.end_set.simple[index] == TextureUses::COMPLEX + { + self.end_set.complex.contains_key(&(index as u32)) + } else { + true + }); + } + + /// Sets the size of all the vectors inside the tracker. + /// + /// Must be called with the highest possible Texture ID before + /// all unsafe functions are called. + pub fn set_size(&mut self, size: usize) { + self.start_set.set_size(size); + self.end_set.set_size(size); + + self.metadata.set_size(size); + } + + /// Extend the vectors to let the given index be valid. + fn allow_index(&mut self, index: usize) { + if index >= self.start_set.simple.len() { + self.set_size(index + 1); + } + } + + /// Returns a list of all textures tracked. + pub fn used(&self) -> impl Iterator<Item = Valid<TextureId>> + '_ { + self.metadata.owned_ids() + } + + /// Drains all currently pending transitions. + pub fn drain(&mut self) -> Drain<PendingTransition<TextureUses>> { + self.temp.drain(..) + } + + /// Get the refcount of the given resource. + /// + /// # Safety + /// + /// [`Self::set_size`] must be called with the maximum possible Buffer ID before this + /// method is called. + /// + /// The resource must be tracked by this tracker. + pub unsafe fn get_ref_count(&self, id: Valid<TextureId>) -> &RefCount { + let (index32, _, _) = id.0.unzip(); + let index = index32 as usize; + + self.tracker_assert_in_bounds(index); + + unsafe { self.metadata.get_ref_count_unchecked(index) } + } + + /// Inserts a single texture and a state into the resource tracker. + /// + /// If the resource already exists in the tracker, this will panic. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn insert_single(&mut self, id: TextureId, ref_count: RefCount, usage: TextureUses) { + let (index32, epoch, _) = id.unzip(); + let index = index32 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + let currently_owned = self.metadata.contains_unchecked(index); + + if currently_owned { + panic!("Tried to insert texture already tracked"); + } + + insert( + None, + Some(&mut self.start_set), + &mut self.end_set, + &mut self.metadata, + index32, + index, + TextureStateProvider::KnownSingle { state: usage }, + None, + ResourceMetadataProvider::Direct { + epoch, + ref_count: Cow::Owned(ref_count), + }, + ) + }; + } + + /// Sets the state of a single texture. + /// + /// If a transition is needed to get the texture into the given state, that transition + /// is returned. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn set_single( + &mut self, + texture: &Texture<A>, + id: TextureId, + selector: TextureSelector, + new_state: TextureUses, + ) -> Option<Drain<'_, PendingTransition<TextureUses>>> { + let (index32, epoch, _) = id.unzip(); + let index = index32 as usize; + + self.allow_index(index); + + self.tracker_assert_in_bounds(index); + + unsafe { + insert_or_barrier_update( + (&texture.life_guard, &texture.full_range), + Some(&mut self.start_set), + &mut self.end_set, + &mut self.metadata, + index32, + index, + TextureStateProvider::Selector { + selector, + state: new_state, + }, + None, + ResourceMetadataProvider::Resource { epoch }, + &mut self.temp, + ) + } + + Some(self.temp.drain(..)) + } + + /// Sets the given state for all texture in the given tracker. + /// + /// If a transition is needed to get the texture into the needed state, + /// those transitions are stored within the tracker. A subsequent + /// call to [`Self::drain`] is needed to get those transitions. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn set_from_tracker( + &mut self, + storage: &hub::Storage<Texture<A>, TextureId>, + tracker: &Self, + ) { + let incoming_size = tracker.start_set.simple.len(); + if incoming_size > self.start_set.simple.len() { + self.set_size(incoming_size); + } + + for index in tracker.metadata.owned_indices() { + let index32 = index as u32; + + self.tracker_assert_in_bounds(index); + tracker.tracker_assert_in_bounds(index); + unsafe { + insert_or_barrier_update( + texture_data_from_texture(storage, index32), + Some(&mut self.start_set), + &mut self.end_set, + &mut self.metadata, + index32, + index, + TextureStateProvider::TextureSet { + set: &tracker.start_set, + }, + Some(TextureStateProvider::TextureSet { + set: &tracker.end_set, + }), + ResourceMetadataProvider::Indirect { + metadata: &tracker.metadata, + }, + &mut self.temp, + ); + } + } + } + + /// Sets the given state for all textures in the given UsageScope. + /// + /// If a transition is needed to get the textures into the needed state, + /// those transitions are stored within the tracker. A subsequent + /// call to [`Self::drain`] is needed to get those transitions. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn set_from_usage_scope( + &mut self, + storage: &hub::Storage<Texture<A>, TextureId>, + scope: &TextureUsageScope<A>, + ) { + let incoming_size = scope.set.simple.len(); + if incoming_size > self.start_set.simple.len() { + self.set_size(incoming_size); + } + + for index in scope.metadata.owned_indices() { + let index32 = index as u32; + + self.tracker_assert_in_bounds(index); + scope.tracker_assert_in_bounds(index); + unsafe { + insert_or_barrier_update( + texture_data_from_texture(storage, index32), + Some(&mut self.start_set), + &mut self.end_set, + &mut self.metadata, + index32, + index, + TextureStateProvider::TextureSet { set: &scope.set }, + None, + ResourceMetadataProvider::Indirect { + metadata: &scope.metadata, + }, + &mut self.temp, + ); + } + } + } + + /// Iterates through all textures in the given bind group and adopts + /// the state given for those textures in the UsageScope. It also + /// removes all touched textures from the usage scope. + /// + /// If a transition is needed to get the textures into the needed state, + /// those transitions are stored within the tracker. A subsequent + /// call to [`Self::drain`] is needed to get those transitions. + /// + /// This is a really funky method used by Compute Passes to generate + /// barriers after a call to dispatch without needing to iterate + /// over all elements in the usage scope. We use each the + /// bind group as a source of which IDs to look at. The bind groups + /// must have first been added to the usage scope. + /// + /// # Safety + /// + /// [`Self::set_size`] must be called with the maximum possible Buffer ID before this + /// method is called. + pub unsafe fn set_and_remove_from_usage_scope_sparse( + &mut self, + storage: &hub::Storage<Texture<A>, TextureId>, + scope: &mut TextureUsageScope<A>, + bind_group_state: &TextureBindGroupState<A>, + ) { + let incoming_size = scope.set.simple.len(); + if incoming_size > self.start_set.simple.len() { + self.set_size(incoming_size); + } + + for &(id, _, _, _) in bind_group_state.textures.iter() { + let (index32, _, _) = id.0.unzip(); + let index = index32 as usize; + scope.tracker_assert_in_bounds(index); + + if unsafe { !scope.metadata.contains_unchecked(index) } { + continue; + } + let texture_data = unsafe { texture_data_from_texture(storage, index32) }; + unsafe { + insert_or_barrier_update( + texture_data, + Some(&mut self.start_set), + &mut self.end_set, + &mut self.metadata, + index32, + index, + TextureStateProvider::TextureSet { set: &scope.set }, + None, + ResourceMetadataProvider::Indirect { + metadata: &scope.metadata, + }, + &mut self.temp, + ) + }; + + unsafe { scope.metadata.remove(index) }; + } + } + + /// Unconditionally removes the given resource from the tracker. + /// + /// Returns true if the resource was removed. + /// + /// If the ID is higher than the length of internal vectors, + /// false will be returned. + pub fn remove(&mut self, id: Valid<TextureId>) -> bool { + let (index32, epoch, _) = id.0.unzip(); + let index = index32 as usize; + + if index > self.metadata.size() { + return false; + } + + self.tracker_assert_in_bounds(index); + + unsafe { + if self.metadata.contains_unchecked(index) { + let existing_epoch = self.metadata.get_epoch_unchecked(index); + assert_eq!(existing_epoch, epoch); + + self.start_set.complex.remove(&index32); + self.end_set.complex.remove(&index32); + + self.metadata.remove(index); + + return true; + } + } + + false + } + + /// Removes the given resource from the tracker iff we have the last reference to the + /// resource and the epoch matches. + /// + /// Returns true if the resource was removed. + /// + /// If the ID is higher than the length of internal vectors, + /// false will be returned. + pub fn remove_abandoned(&mut self, id: Valid<TextureId>) -> bool { + let (index32, epoch, _) = id.0.unzip(); + let index = index32 as usize; + + if index > self.metadata.size() { + return false; + } + + self.tracker_assert_in_bounds(index); + + unsafe { + if self.metadata.contains_unchecked(index) { + let existing_epoch = self.metadata.get_epoch_unchecked(index); + let existing_ref_count = self.metadata.get_ref_count_unchecked(index); + + if existing_epoch == epoch && existing_ref_count.load() == 1 { + self.start_set.complex.remove(&index32); + self.end_set.complex.remove(&index32); + + self.metadata.remove(index); + + return true; + } + } + } + + false + } +} + +/// An iterator adapter that can store two different iterator types. +#[derive(Clone)] +enum EitherIter<L, R> { + Left(L), + Right(R), +} + +impl<L, R, D> Iterator for EitherIter<L, R> +where + L: Iterator<Item = D>, + R: Iterator<Item = D>, +{ + type Item = D; + + fn next(&mut self) -> Option<Self::Item> { + match *self { + EitherIter::Left(ref mut inner) => inner.next(), + EitherIter::Right(ref mut inner) => inner.next(), + } + } +} + +/// Container that signifies storing both different things +/// if there is a single state or many different states +/// involved in the operation. +#[derive(Debug, Clone)] +enum SingleOrManyStates<S, M> { + Single(S), + Many(M), +} + +/// A source of texture state. +#[derive(Clone)] +enum TextureStateProvider<'a> { + /// Comes directly from a single state. + KnownSingle { state: TextureUses }, + /// Comes from a selector and a single state. + Selector { + selector: TextureSelector, + state: TextureUses, + }, + /// Comes from another texture set. + TextureSet { set: &'a TextureStateSet }, +} +impl<'a> TextureStateProvider<'a> { + /// Convenience function turning `Option<Selector>` into this enum. + fn from_option(selector: Option<TextureSelector>, state: TextureUses) -> Self { + match selector { + Some(selector) => Self::Selector { selector, state }, + None => Self::KnownSingle { state }, + } + } + + /// Get the state provided by this. + /// + /// # Panics + /// + /// Panics if texture_data is None and this uses a Selector source. + /// + /// # Safety + /// + /// - The index must be in bounds of the state set if this uses an TextureSet source. + #[inline(always)] + unsafe fn get_state( + self, + texture_data: Option<(&LifeGuard, &TextureSelector)>, + index32: u32, + index: usize, + ) -> SingleOrManyStates< + TextureUses, + impl Iterator<Item = (TextureSelector, TextureUses)> + Clone + 'a, + > { + match self { + TextureStateProvider::KnownSingle { state } => SingleOrManyStates::Single(state), + TextureStateProvider::Selector { selector, state } => { + // We check if the selector given is actually for the full resource, + // and if it is we promote to a simple state. This allows upstream + // code to specify selectors willy nilly, and all that are really + // single states are promoted here. + if *texture_data.unwrap().1 == selector { + SingleOrManyStates::Single(state) + } else { + SingleOrManyStates::Many(EitherIter::Left(iter::once((selector, state)))) + } + } + TextureStateProvider::TextureSet { set } => { + let new_state = *unsafe { set.simple.get_unchecked(index) }; + + if new_state == TextureUses::COMPLEX { + let new_complex = unsafe { set.complex.get(&index32).unwrap_unchecked() }; + + SingleOrManyStates::Many(EitherIter::Right( + new_complex.to_selector_state_iter(), + )) + } else { + SingleOrManyStates::Single(new_state) + } + } + } + } +} + +/// Helper function that gets what is needed from the texture storage +/// out of the texture storage. +#[inline(always)] +unsafe fn texture_data_from_texture<A: hub::HalApi>( + storage: &hub::Storage<Texture<A>, TextureId>, + index32: u32, +) -> (&LifeGuard, &TextureSelector) { + let texture = unsafe { storage.get_unchecked(index32) }; + (&texture.life_guard, &texture.full_range) +} + +/// Does an insertion operation if the index isn't tracked +/// in the current metadata, otherwise merges the given state +/// with the current state. If the merging would cause +/// a conflict, returns that usage conflict. +/// +/// # Safety +/// +/// Indexes must be valid indexes into all arrays passed in +/// to this function, either directly or via metadata or provider structs. +#[inline(always)] +unsafe fn insert_or_merge<A: hub::HalApi>( + texture_data: (&LifeGuard, &TextureSelector), + current_state_set: &mut TextureStateSet, + resource_metadata: &mut ResourceMetadata<A>, + index32: u32, + index: usize, + state_provider: TextureStateProvider<'_>, + metadata_provider: ResourceMetadataProvider<'_, A>, +) -> Result<(), UsageConflict> { + let currently_owned = unsafe { resource_metadata.contains_unchecked(index) }; + + if !currently_owned { + unsafe { + insert( + Some(texture_data), + None, + current_state_set, + resource_metadata, + index32, + index, + state_provider, + None, + metadata_provider, + ) + }; + return Ok(()); + } + + unsafe { + merge( + texture_data, + current_state_set, + index32, + index, + state_provider, + metadata_provider, + ) + } +} + +/// If the resource isn't tracked +/// - Inserts the given resource. +/// - Uses the `start_state_provider` to populate `start_states` +/// - Uses either `end_state_provider` or `start_state_provider` +/// to populate `current_states`. +/// If the resource is tracked +/// - Inserts barriers from the state in `current_states` +/// to the state provided by `start_state_provider`. +/// - Updates the `current_states` with either the state from +/// `end_state_provider` or `start_state_provider`. +/// +/// Any barriers are added to the barrier vector. +/// +/// # Safety +/// +/// Indexes must be valid indexes into all arrays passed in +/// to this function, either directly or via metadata or provider structs. +#[inline(always)] +unsafe fn insert_or_barrier_update<A: hub::HalApi>( + texture_data: (&LifeGuard, &TextureSelector), + start_state: Option<&mut TextureStateSet>, + current_state_set: &mut TextureStateSet, + resource_metadata: &mut ResourceMetadata<A>, + index32: u32, + index: usize, + start_state_provider: TextureStateProvider<'_>, + end_state_provider: Option<TextureStateProvider<'_>>, + metadata_provider: ResourceMetadataProvider<'_, A>, + barriers: &mut Vec<PendingTransition<TextureUses>>, +) { + let currently_owned = unsafe { resource_metadata.contains_unchecked(index) }; + + if !currently_owned { + unsafe { + insert( + Some(texture_data), + start_state, + current_state_set, + resource_metadata, + index32, + index, + start_state_provider, + end_state_provider, + metadata_provider, + ) + }; + return; + } + + let update_state_provider = end_state_provider.unwrap_or_else(|| start_state_provider.clone()); + unsafe { + barrier( + texture_data, + current_state_set, + index32, + index, + start_state_provider, + barriers, + ) + }; + + let start_state_set = start_state.unwrap(); + unsafe { + update( + texture_data, + start_state_set, + current_state_set, + index32, + index, + update_state_provider, + ) + }; +} + +#[inline(always)] +unsafe fn insert<A: hub::HalApi>( + texture_data: Option<(&LifeGuard, &TextureSelector)>, + start_state: Option<&mut TextureStateSet>, + end_state: &mut TextureStateSet, + resource_metadata: &mut ResourceMetadata<A>, + index32: u32, + index: usize, + start_state_provider: TextureStateProvider<'_>, + end_state_provider: Option<TextureStateProvider<'_>>, + metadata_provider: ResourceMetadataProvider<'_, A>, +) { + let start_layers = unsafe { start_state_provider.get_state(texture_data, index32, index) }; + match start_layers { + SingleOrManyStates::Single(state) => { + // This should only ever happen with a wgpu bug, but let's just double + // check that resource states don't have any conflicts. + strict_assert_eq!(invalid_resource_state(state), false); + + log::trace!("\ttex {index32}: insert start {state:?}"); + + if let Some(start_state) = start_state { + unsafe { *start_state.simple.get_unchecked_mut(index) = state }; + } + + // We only need to insert ourselves the end state if there is no end state provider. + if end_state_provider.is_none() { + unsafe { *end_state.simple.get_unchecked_mut(index) = state }; + } + } + SingleOrManyStates::Many(state_iter) => { + let full_range = texture_data.unwrap().1.clone(); + + let complex = + unsafe { ComplexTextureState::from_selector_state_iter(full_range, state_iter) }; + + log::trace!("\ttex {index32}: insert start {complex:?}"); + + if let Some(start_state) = start_state { + unsafe { *start_state.simple.get_unchecked_mut(index) = TextureUses::COMPLEX }; + start_state.complex.insert(index32, complex.clone()); + } + + // We only need to insert ourselves the end state if there is no end state provider. + if end_state_provider.is_none() { + unsafe { *end_state.simple.get_unchecked_mut(index) = TextureUses::COMPLEX }; + end_state.complex.insert(index32, complex); + } + } + } + + if let Some(end_state_provider) = end_state_provider { + match unsafe { end_state_provider.get_state(texture_data, index32, index) } { + SingleOrManyStates::Single(state) => { + // This should only ever happen with a wgpu bug, but let's just double + // check that resource states don't have any conflicts. + strict_assert_eq!(invalid_resource_state(state), false); + + log::trace!("\ttex {index32}: insert end {state:?}"); + + // We only need to insert into the end, as there is guarenteed to be + // a start state provider. + unsafe { *end_state.simple.get_unchecked_mut(index) = state }; + } + SingleOrManyStates::Many(state_iter) => { + let full_range = texture_data.unwrap().1.clone(); + + let complex = unsafe { + ComplexTextureState::from_selector_state_iter(full_range, state_iter) + }; + + log::trace!("\ttex {index32}: insert end {complex:?}"); + + // We only need to insert into the end, as there is guarenteed to be + // a start state provider. + unsafe { *end_state.simple.get_unchecked_mut(index) = TextureUses::COMPLEX }; + end_state.complex.insert(index32, complex); + } + } + } + + unsafe { + let (epoch, ref_count) = + metadata_provider.get_own(texture_data.map(|(life_guard, _)| life_guard), index); + resource_metadata.insert(index, epoch, ref_count); + } +} + +#[inline(always)] +unsafe fn merge<A: hub::HalApi>( + texture_data: (&LifeGuard, &TextureSelector), + current_state_set: &mut TextureStateSet, + index32: u32, + index: usize, + state_provider: TextureStateProvider<'_>, + metadata_provider: ResourceMetadataProvider<'_, A>, +) -> Result<(), UsageConflict> { + let current_simple = unsafe { current_state_set.simple.get_unchecked_mut(index) }; + let current_state = if *current_simple == TextureUses::COMPLEX { + SingleOrManyStates::Many(unsafe { + current_state_set + .complex + .get_mut(&index32) + .unwrap_unchecked() + }) + } else { + SingleOrManyStates::Single(current_simple) + }; + + let new_state = unsafe { state_provider.get_state(Some(texture_data), index32, index) }; + + match (current_state, new_state) { + (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Single(new_simple)) => { + let merged_state = *current_simple | new_simple; + + log::trace!("\ttex {index32}: merge simple {current_simple:?} + {new_simple:?}"); + + if invalid_resource_state(merged_state) { + return Err(UsageConflict::from_texture( + TextureId::zip( + index32, + unsafe { metadata_provider.get_epoch(index) }, + A::VARIANT, + ), + texture_data.1.clone(), + *current_simple, + new_simple, + )); + } + + *current_simple = merged_state; + } + (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Many(new_many)) => { + // Because we are now demoting this simple state to a complex state, + // we actually need to make a whole new complex state for us to use + // as there wasn't one before. + let mut new_complex = unsafe { + ComplexTextureState::from_selector_state_iter( + texture_data.1.clone(), + iter::once((texture_data.1.clone(), *current_simple)), + ) + }; + + for (selector, new_state) in new_many { + let merged_state = *current_simple | new_state; + + log::trace!( + "\ttex {index32}: merge {selector:?} {current_simple:?} + {new_state:?}" + ); + + if invalid_resource_state(merged_state) { + return Err(UsageConflict::from_texture( + TextureId::zip( + index32, + unsafe { metadata_provider.get_epoch(index) }, + A::VARIANT, + ), + selector, + *current_simple, + new_state, + )); + } + + for mip in + &mut new_complex.mips[selector.mips.start as usize..selector.mips.end as usize] + { + for &mut (_, ref mut current_layer_state) in + mip.isolate(&selector.layers, TextureUses::UNKNOWN) + { + *current_layer_state = merged_state; + } + + mip.coalesce(); + } + } + + *current_simple = TextureUses::COMPLEX; + current_state_set.complex.insert(index32, new_complex); + } + (SingleOrManyStates::Many(current_complex), SingleOrManyStates::Single(new_simple)) => { + for (mip_id, mip) in current_complex.mips.iter_mut().enumerate() { + let mip_id = mip_id as u32; + + for &mut (ref layers, ref mut current_layer_state) in mip.iter_mut() { + let merged_state = *current_layer_state | new_simple; + + // Once we remove unknown, this will never be empty, as + // simple states are never unknown. + let merged_state = merged_state - TextureUses::UNKNOWN; + + log::trace!( + "\ttex {index32}: merge mip {mip_id} layers {layers:?} \ + {current_layer_state:?} + {new_simple:?}" + ); + + if invalid_resource_state(merged_state) { + return Err(UsageConflict::from_texture( + TextureId::zip( + index32, + unsafe { metadata_provider.get_epoch(index) }, + A::VARIANT, + ), + TextureSelector { + mips: mip_id..mip_id + 1, + layers: layers.clone(), + }, + *current_layer_state, + new_simple, + )); + } + + *current_layer_state = merged_state; + } + + mip.coalesce(); + } + } + (SingleOrManyStates::Many(current_complex), SingleOrManyStates::Many(new_many)) => { + for (selector, new_state) in new_many { + for mip_id in selector.mips { + strict_assert!((mip_id as usize) < current_complex.mips.len()); + + let mip = unsafe { current_complex.mips.get_unchecked_mut(mip_id as usize) }; + + for &mut (ref layers, ref mut current_layer_state) in + mip.isolate(&selector.layers, TextureUses::UNKNOWN) + { + let merged_state = *current_layer_state | new_state; + let merged_state = merged_state - TextureUses::UNKNOWN; + + if merged_state.is_empty() { + // We know nothing about this state, lets just move on. + continue; + } + + log::trace!( + "\ttex {index32}: merge mip {mip_id} layers {layers:?} \ + {current_layer_state:?} + {new_state:?}" + ); + + if invalid_resource_state(merged_state) { + return Err(UsageConflict::from_texture( + TextureId::zip( + index32, + unsafe { metadata_provider.get_epoch(index) }, + A::VARIANT, + ), + TextureSelector { + mips: mip_id..mip_id + 1, + layers: layers.clone(), + }, + *current_layer_state, + new_state, + )); + } + *current_layer_state = merged_state; + } + + mip.coalesce(); + } + } + } + } + Ok(()) +} + +#[inline(always)] +unsafe fn barrier( + texture_data: (&LifeGuard, &TextureSelector), + current_state_set: &TextureStateSet, + index32: u32, + index: usize, + state_provider: TextureStateProvider<'_>, + barriers: &mut Vec<PendingTransition<TextureUses>>, +) { + let current_simple = unsafe { *current_state_set.simple.get_unchecked(index) }; + let current_state = if current_simple == TextureUses::COMPLEX { + SingleOrManyStates::Many(unsafe { + current_state_set.complex.get(&index32).unwrap_unchecked() + }) + } else { + SingleOrManyStates::Single(current_simple) + }; + + let new_state = unsafe { state_provider.get_state(Some(texture_data), index32, index) }; + + match (current_state, new_state) { + (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Single(new_simple)) => { + if skip_barrier(current_simple, new_simple) { + return; + } + + log::trace!("\ttex {index32}: transition simple {current_simple:?} -> {new_simple:?}"); + + barriers.push(PendingTransition { + id: index32, + selector: texture_data.1.clone(), + usage: current_simple..new_simple, + }); + } + (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Many(new_many)) => { + for (selector, new_state) in new_many { + if new_state == TextureUses::UNKNOWN { + continue; + } + + if skip_barrier(current_simple, new_state) { + continue; + } + + log::trace!( + "\ttex {index32}: transition {selector:?} {current_simple:?} -> {new_state:?}" + ); + + barriers.push(PendingTransition { + id: index32, + selector, + usage: current_simple..new_state, + }); + } + } + (SingleOrManyStates::Many(current_complex), SingleOrManyStates::Single(new_simple)) => { + for (mip_id, mip) in current_complex.mips.iter().enumerate() { + let mip_id = mip_id as u32; + + for &(ref layers, current_layer_state) in mip.iter() { + if current_layer_state == TextureUses::UNKNOWN { + continue; + } + + if skip_barrier(current_layer_state, new_simple) { + continue; + } + + log::trace!( + "\ttex {index32}: transition mip {mip_id} layers {layers:?} \ + {current_layer_state:?} -> {new_simple:?}" + ); + + barriers.push(PendingTransition { + id: index32, + selector: TextureSelector { + mips: mip_id..mip_id + 1, + layers: layers.clone(), + }, + usage: current_layer_state..new_simple, + }); + } + } + } + (SingleOrManyStates::Many(current_complex), SingleOrManyStates::Many(new_many)) => { + for (selector, new_state) in new_many { + for mip_id in selector.mips { + strict_assert!((mip_id as usize) < current_complex.mips.len()); + + let mip = unsafe { current_complex.mips.get_unchecked(mip_id as usize) }; + + for (layers, current_layer_state) in mip.iter_filter(&selector.layers) { + if *current_layer_state == TextureUses::UNKNOWN + || new_state == TextureUses::UNKNOWN + { + continue; + } + + if skip_barrier(*current_layer_state, new_state) { + continue; + } + + log::trace!( + "\ttex {index32}: transition mip {mip_id} layers {layers:?} \ + {current_layer_state:?} -> {new_state:?}" + ); + + barriers.push(PendingTransition { + id: index32, + selector: TextureSelector { + mips: mip_id..mip_id + 1, + layers, + }, + usage: *current_layer_state..new_state, + }); + } + } + } + } + } +} + +#[allow(clippy::needless_option_as_deref)] // we use this for reborrowing Option<&mut T> +#[inline(always)] +unsafe fn update( + texture_data: (&LifeGuard, &TextureSelector), + start_state_set: &mut TextureStateSet, + current_state_set: &mut TextureStateSet, + index32: u32, + index: usize, + state_provider: TextureStateProvider<'_>, +) { + let start_simple = unsafe { *start_state_set.simple.get_unchecked(index) }; + + // We only ever need to update the start state here if the state is complex. + // + // If the state is simple, the first insert to the tracker would cover it. + let mut start_complex = None; + if start_simple == TextureUses::COMPLEX { + start_complex = + Some(unsafe { start_state_set.complex.get_mut(&index32).unwrap_unchecked() }); + } + + let current_simple = unsafe { current_state_set.simple.get_unchecked_mut(index) }; + let current_state = if *current_simple == TextureUses::COMPLEX { + SingleOrManyStates::Many(unsafe { + current_state_set + .complex + .get_mut(&index32) + .unwrap_unchecked() + }) + } else { + SingleOrManyStates::Single(current_simple) + }; + + let new_state = unsafe { state_provider.get_state(Some(texture_data), index32, index) }; + + match (current_state, new_state) { + (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Single(new_simple)) => { + *current_simple = new_simple; + } + (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Many(new_many)) => { + // Because we are now demoting this simple state to a complex state, + // we actually need to make a whole new complex state for us to use + // as there wasn't one before. + let mut new_complex = unsafe { + ComplexTextureState::from_selector_state_iter( + texture_data.1.clone(), + iter::once((texture_data.1.clone(), *current_simple)), + ) + }; + + for (selector, mut new_state) in new_many { + if new_state == TextureUses::UNKNOWN { + new_state = *current_simple; + } + for mip in + &mut new_complex.mips[selector.mips.start as usize..selector.mips.end as usize] + { + for &mut (_, ref mut current_layer_state) in + mip.isolate(&selector.layers, TextureUses::UNKNOWN) + { + *current_layer_state = new_state; + } + + mip.coalesce(); + } + } + + *current_simple = TextureUses::COMPLEX; + current_state_set.complex.insert(index32, new_complex); + } + (SingleOrManyStates::Many(current_complex), SingleOrManyStates::Single(new_single)) => { + for (mip_id, mip) in current_complex.mips.iter().enumerate() { + for &(ref layers, current_layer_state) in mip.iter() { + // If this state is unknown, that means that the start is _also_ unknown. + if current_layer_state == TextureUses::UNKNOWN { + if let Some(&mut ref mut start_complex) = start_complex { + strict_assert!(mip_id < start_complex.mips.len()); + + let start_mip = unsafe { start_complex.mips.get_unchecked_mut(mip_id) }; + + for &mut (_, ref mut current_start_state) in + start_mip.isolate(layers, TextureUses::UNKNOWN) + { + strict_assert_eq!(*current_start_state, TextureUses::UNKNOWN); + *current_start_state = new_single; + } + + start_mip.coalesce(); + } + } + } + } + + unsafe { *current_state_set.simple.get_unchecked_mut(index) = new_single }; + unsafe { + current_state_set + .complex + .remove(&index32) + .unwrap_unchecked() + }; + } + (SingleOrManyStates::Many(current_complex), SingleOrManyStates::Many(new_many)) => { + for (selector, new_state) in new_many { + if new_state == TextureUses::UNKNOWN { + // We know nothing new + continue; + } + + for mip_id in selector.mips { + let mip_id = mip_id as usize; + strict_assert!(mip_id < current_complex.mips.len()); + + let mip = unsafe { current_complex.mips.get_unchecked_mut(mip_id) }; + + for &mut (ref layers, ref mut current_layer_state) in + mip.isolate(&selector.layers, TextureUses::UNKNOWN) + { + if *current_layer_state == TextureUses::UNKNOWN + && new_state != TextureUses::UNKNOWN + { + // We now know something about this subresource that + // we didn't before so we should go back and update + // the start state. + // + // We know we must have starter state be complex, + // otherwise we would know about this state. + strict_assert!(start_complex.is_some()); + + let start_complex = + unsafe { start_complex.as_deref_mut().unwrap_unchecked() }; + + strict_assert!(mip_id < start_complex.mips.len()); + + let start_mip = unsafe { start_complex.mips.get_unchecked_mut(mip_id) }; + + for &mut (_, ref mut current_start_state) in + start_mip.isolate(layers, TextureUses::UNKNOWN) + { + strict_assert_eq!(*current_start_state, TextureUses::UNKNOWN); + *current_start_state = new_state; + } + + start_mip.coalesce(); + } + + *current_layer_state = new_state; + } + + mip.coalesce(); + } + } + } + } +} diff --git a/third_party/rust/wgpu-core/src/validation.rs b/third_party/rust/wgpu-core/src/validation.rs new file mode 100644 index 0000000000..d190cbbfd9 --- /dev/null +++ b/third_party/rust/wgpu-core/src/validation.rs @@ -0,0 +1,1226 @@ +use crate::{binding_model::BindEntryMap, FastHashMap, FastHashSet}; +use naga::valid::GlobalUse; +use std::{collections::hash_map::Entry, fmt}; +use thiserror::Error; +use wgt::{BindGroupLayoutEntry, BindingType}; + +#[derive(Debug)] +enum ResourceType { + Buffer { + size: wgt::BufferSize, + }, + Texture { + dim: naga::ImageDimension, + arrayed: bool, + class: naga::ImageClass, + }, + Sampler { + comparison: bool, + }, +} + +#[derive(Debug)] +struct Resource { + #[allow(unused)] + name: Option<String>, + bind: naga::ResourceBinding, + ty: ResourceType, + class: naga::AddressSpace, +} + +#[derive(Clone, Copy, Debug)] +enum NumericDimension { + Scalar, + Vector(naga::VectorSize), + Matrix(naga::VectorSize, naga::VectorSize), +} + +impl fmt::Display for NumericDimension { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::Scalar => write!(f, ""), + Self::Vector(size) => write!(f, "x{}", size as u8), + Self::Matrix(columns, rows) => write!(f, "x{}{}", columns as u8, rows as u8), + } + } +} + +impl NumericDimension { + fn num_components(&self) -> u32 { + match *self { + Self::Scalar => 1, + Self::Vector(size) => size as u32, + Self::Matrix(w, h) => w as u32 * h as u32, + } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct NumericType { + dim: NumericDimension, + kind: naga::ScalarKind, + width: naga::Bytes, +} + +impl fmt::Display for NumericType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}{}{}", self.kind, self.width * 8, self.dim) + } +} + +#[derive(Clone, Debug)] +pub struct InterfaceVar { + pub ty: NumericType, + interpolation: Option<naga::Interpolation>, + sampling: Option<naga::Sampling>, +} + +impl InterfaceVar { + pub fn vertex_attribute(format: wgt::VertexFormat) -> Self { + InterfaceVar { + ty: NumericType::from_vertex_format(format), + interpolation: None, + sampling: None, + } + } +} + +impl fmt::Display for InterfaceVar { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{} interpolated as {:?} with sampling {:?}", + self.ty, self.interpolation, self.sampling + ) + } +} + +#[derive(Debug)] +enum Varying { + Local { location: u32, iv: InterfaceVar }, + BuiltIn(naga::BuiltIn), +} + +#[allow(unused)] +#[derive(Debug)] +struct SpecializationConstant { + id: u32, + ty: NumericType, +} + +#[derive(Debug, Default)] +struct EntryPoint { + inputs: Vec<Varying>, + outputs: Vec<Varying>, + resources: Vec<(naga::Handle<Resource>, GlobalUse)>, + #[allow(unused)] + spec_constants: Vec<SpecializationConstant>, + sampling_pairs: FastHashSet<(naga::Handle<Resource>, naga::Handle<Resource>)>, + workgroup_size: [u32; 3], +} + +#[derive(Debug)] +pub struct Interface { + features: wgt::Features, + limits: wgt::Limits, + resources: naga::Arena<Resource>, + entry_points: FastHashMap<(naga::ShaderStage, String), EntryPoint>, +} + +#[derive(Clone, Debug, Error)] +#[error("buffer usage is {actual:?} which does not contain required usage {expected:?}")] +pub struct MissingBufferUsageError { + pub(crate) actual: wgt::BufferUsages, + pub(crate) expected: wgt::BufferUsages, +} + +/// Checks that the given buffer usage contains the required buffer usage, +/// returns an error otherwise. +pub fn check_buffer_usage( + actual: wgt::BufferUsages, + expected: wgt::BufferUsages, +) -> Result<(), MissingBufferUsageError> { + if !actual.contains(expected) { + Err(MissingBufferUsageError { actual, expected }) + } else { + Ok(()) + } +} + +#[derive(Clone, Debug, Error)] +#[error("texture usage is {actual:?} which does not contain required usage {expected:?}")] +pub struct MissingTextureUsageError { + pub(crate) actual: wgt::TextureUsages, + pub(crate) expected: wgt::TextureUsages, +} + +/// Checks that the given texture usage contains the required texture usage, +/// returns an error otherwise. +pub fn check_texture_usage( + actual: wgt::TextureUsages, + expected: wgt::TextureUsages, +) -> Result<(), MissingTextureUsageError> { + if !actual.contains(expected) { + Err(MissingTextureUsageError { actual, expected }) + } else { + Ok(()) + } +} + +#[derive(Clone, Debug, Error)] +pub enum BindingError { + #[error("binding is missing from the pipeline layout")] + Missing, + #[error("visibility flags don't include the shader stage")] + Invisible, + #[error("The shader requires the load/store access flags {required:?} but only {allowed:?} is allowed")] + WrongUsage { + required: GlobalUse, + allowed: GlobalUse, + }, + #[error("type on the shader side does not match the pipeline binding")] + WrongType, + #[error("storage class {binding:?} doesn't match the shader {shader:?}")] + WrongAddressSpace { + binding: naga::AddressSpace, + shader: naga::AddressSpace, + }, + #[error("buffer structure size {0}, added to one element of an unbound array, if it's the last field, ended up greater than the given `min_binding_size`")] + WrongBufferSize(wgt::BufferSize), + #[error("view dimension {dim:?} (is array: {is_array}) doesn't match the binding {binding:?}")] + WrongTextureViewDimension { + dim: naga::ImageDimension, + is_array: bool, + binding: BindingType, + }, + #[error("texture class {binding:?} doesn't match the shader {shader:?}")] + WrongTextureClass { + binding: naga::ImageClass, + shader: naga::ImageClass, + }, + #[error("comparison flag doesn't match the shader")] + WrongSamplerComparison, + #[error("derived bind group layout type is not consistent between stages")] + InconsistentlyDerivedType, + #[error("texture format {0:?} is not supported for storage use")] + BadStorageFormat(wgt::TextureFormat), + #[error( + "storage texture usage {0:?} doesn't have a matching supported `StorageTextureAccess`" + )] + UnsupportedTextureStorageAccess(GlobalUse), +} + +#[derive(Clone, Debug, Error)] +pub enum FilteringError { + #[error("integer textures can't be sampled")] + Integer, + #[error("non-filterable float texture")] + NonFilterable, +} + +#[derive(Clone, Debug, Error)] +pub enum InputError { + #[error("input is not provided by the earlier stage in the pipeline")] + Missing, + #[error("input type is not compatible with the provided {0}")] + WrongType(NumericType), + #[error("input interpolation doesn't match provided {0:?}")] + InterpolationMismatch(Option<naga::Interpolation>), + #[error("input sampling doesn't match provided {0:?}")] + SamplingMismatch(Option<naga::Sampling>), +} + +/// Errors produced when validating a programmable stage of a pipeline. +#[derive(Clone, Debug, Error)] +pub enum StageError { + #[error("shader module is invalid")] + InvalidModule, + #[error( + "shader entry point's workgroup size {current:?} ({current_total} total invocations) must be less or equal to the per-dimension limit {limit:?} and the total invocation limit {total}" + )] + InvalidWorkgroupSize { + current: [u32; 3], + current_total: u32, + limit: [u32; 3], + total: u32, + }, + #[error("shader uses {used} inter-stage components above the limit of {limit}")] + TooManyVaryings { used: u32, limit: u32 }, + #[error("unable to find entry point '{0}'")] + MissingEntryPoint(String), + #[error("shader global {0:?} is not available in the layout pipeline layout")] + Binding(naga::ResourceBinding, #[source] BindingError), + #[error("unable to filter the texture ({texture:?}) by the sampler ({sampler:?})")] + Filtering { + texture: naga::ResourceBinding, + sampler: naga::ResourceBinding, + #[source] + error: FilteringError, + }, + #[error("location[{location}] {var} is not provided by the previous stage outputs")] + Input { + location: wgt::ShaderLocation, + var: InterfaceVar, + #[source] + error: InputError, + }, + #[error("location[{location}] is provided by the previous stage output but is not consumed as input by this stage.")] + InputNotConsumed { location: wgt::ShaderLocation }, +} + +fn map_storage_format_to_naga(format: wgt::TextureFormat) -> Option<naga::StorageFormat> { + use naga::StorageFormat as Sf; + use wgt::TextureFormat as Tf; + + Some(match format { + Tf::R8Unorm => Sf::R8Unorm, + Tf::R8Snorm => Sf::R8Snorm, + Tf::R8Uint => Sf::R8Uint, + Tf::R8Sint => Sf::R8Sint, + + Tf::R16Uint => Sf::R16Uint, + Tf::R16Sint => Sf::R16Sint, + Tf::R16Float => Sf::R16Float, + Tf::Rg8Unorm => Sf::Rg8Unorm, + Tf::Rg8Snorm => Sf::Rg8Snorm, + Tf::Rg8Uint => Sf::Rg8Uint, + Tf::Rg8Sint => Sf::Rg8Sint, + + Tf::R32Uint => Sf::R32Uint, + Tf::R32Sint => Sf::R32Sint, + Tf::R32Float => Sf::R32Float, + Tf::Rg16Uint => Sf::Rg16Uint, + Tf::Rg16Sint => Sf::Rg16Sint, + Tf::Rg16Float => Sf::Rg16Float, + Tf::Rgba8Unorm => Sf::Rgba8Unorm, + Tf::Rgba8Snorm => Sf::Rgba8Snorm, + Tf::Rgba8Uint => Sf::Rgba8Uint, + Tf::Rgba8Sint => Sf::Rgba8Sint, + + Tf::Rgb10a2Unorm => Sf::Rgb10a2Unorm, + Tf::Rg11b10Float => Sf::Rg11b10Float, + + Tf::Rg32Uint => Sf::Rg32Uint, + Tf::Rg32Sint => Sf::Rg32Sint, + Tf::Rg32Float => Sf::Rg32Float, + Tf::Rgba16Uint => Sf::Rgba16Uint, + Tf::Rgba16Sint => Sf::Rgba16Sint, + Tf::Rgba16Float => Sf::Rgba16Float, + + Tf::Rgba32Uint => Sf::Rgba32Uint, + Tf::Rgba32Sint => Sf::Rgba32Sint, + Tf::Rgba32Float => Sf::Rgba32Float, + + _ => return None, + }) +} + +fn map_storage_format_from_naga(format: naga::StorageFormat) -> wgt::TextureFormat { + use naga::StorageFormat as Sf; + use wgt::TextureFormat as Tf; + + match format { + Sf::R8Unorm => Tf::R8Unorm, + Sf::R8Snorm => Tf::R8Snorm, + Sf::R8Uint => Tf::R8Uint, + Sf::R8Sint => Tf::R8Sint, + + Sf::R16Uint => Tf::R16Uint, + Sf::R16Sint => Tf::R16Sint, + Sf::R16Float => Tf::R16Float, + Sf::Rg8Unorm => Tf::Rg8Unorm, + Sf::Rg8Snorm => Tf::Rg8Snorm, + Sf::Rg8Uint => Tf::Rg8Uint, + Sf::Rg8Sint => Tf::Rg8Sint, + + Sf::R32Uint => Tf::R32Uint, + Sf::R32Sint => Tf::R32Sint, + Sf::R32Float => Tf::R32Float, + Sf::Rg16Uint => Tf::Rg16Uint, + Sf::Rg16Sint => Tf::Rg16Sint, + Sf::Rg16Float => Tf::Rg16Float, + Sf::Rgba8Unorm => Tf::Rgba8Unorm, + Sf::Rgba8Snorm => Tf::Rgba8Snorm, + Sf::Rgba8Uint => Tf::Rgba8Uint, + Sf::Rgba8Sint => Tf::Rgba8Sint, + + Sf::Rgb10a2Unorm => Tf::Rgb10a2Unorm, + Sf::Rg11b10Float => Tf::Rg11b10Float, + + Sf::Rg32Uint => Tf::Rg32Uint, + Sf::Rg32Sint => Tf::Rg32Sint, + Sf::Rg32Float => Tf::Rg32Float, + Sf::Rgba16Uint => Tf::Rgba16Uint, + Sf::Rgba16Sint => Tf::Rgba16Sint, + Sf::Rgba16Float => Tf::Rgba16Float, + + Sf::Rgba32Uint => Tf::Rgba32Uint, + Sf::Rgba32Sint => Tf::Rgba32Sint, + Sf::Rgba32Float => Tf::Rgba32Float, + } +} + +impl Resource { + fn check_binding_use( + &self, + entry: &BindGroupLayoutEntry, + shader_usage: GlobalUse, + ) -> Result<(), BindingError> { + let allowed_usage = match self.ty { + ResourceType::Buffer { size } => { + let (allowed_usage, min_size) = match entry.ty { + BindingType::Buffer { + ty, + has_dynamic_offset: _, + min_binding_size, + } => { + let (class, global_use) = match ty { + wgt::BufferBindingType::Uniform => { + (naga::AddressSpace::Uniform, GlobalUse::READ) + } + wgt::BufferBindingType::Storage { read_only } => { + let mut global_use = GlobalUse::READ | GlobalUse::QUERY; + global_use.set(GlobalUse::WRITE, !read_only); + let mut naga_access = naga::StorageAccess::LOAD; + naga_access.set(naga::StorageAccess::STORE, !read_only); + ( + naga::AddressSpace::Storage { + access: naga_access, + }, + global_use, + ) + } + }; + if self.class != class { + return Err(BindingError::WrongAddressSpace { + binding: class, + shader: self.class, + }); + } + (global_use, min_binding_size) + } + _ => return Err(BindingError::WrongType), + }; + match min_size { + Some(non_zero) if non_zero < size => { + return Err(BindingError::WrongBufferSize(size)) + } + _ => (), + } + allowed_usage + } + ResourceType::Sampler { comparison } => match entry.ty { + BindingType::Sampler(ty) => { + if (ty == wgt::SamplerBindingType::Comparison) == comparison { + GlobalUse::READ + } else { + return Err(BindingError::WrongSamplerComparison); + } + } + _ => return Err(BindingError::WrongType), + }, + ResourceType::Texture { + dim, + arrayed, + class, + } => { + let view_dimension = match entry.ty { + BindingType::Texture { view_dimension, .. } + | BindingType::StorageTexture { view_dimension, .. } => view_dimension, + _ => { + return Err(BindingError::WrongTextureViewDimension { + dim, + is_array: false, + binding: entry.ty, + }) + } + }; + if arrayed { + match (dim, view_dimension) { + (naga::ImageDimension::D2, wgt::TextureViewDimension::D2Array) => (), + (naga::ImageDimension::Cube, wgt::TextureViewDimension::CubeArray) => (), + _ => { + return Err(BindingError::WrongTextureViewDimension { + dim, + is_array: true, + binding: entry.ty, + }) + } + } + } else { + match (dim, view_dimension) { + (naga::ImageDimension::D1, wgt::TextureViewDimension::D1) => (), + (naga::ImageDimension::D2, wgt::TextureViewDimension::D2) => (), + (naga::ImageDimension::D3, wgt::TextureViewDimension::D3) => (), + (naga::ImageDimension::Cube, wgt::TextureViewDimension::Cube) => (), + _ => { + return Err(BindingError::WrongTextureViewDimension { + dim, + is_array: false, + binding: entry.ty, + }) + } + } + } + let (expected_class, usage) = match entry.ty { + BindingType::Texture { + sample_type, + view_dimension: _, + multisampled: multi, + } => { + let class = match sample_type { + wgt::TextureSampleType::Float { .. } => naga::ImageClass::Sampled { + kind: naga::ScalarKind::Float, + multi, + }, + wgt::TextureSampleType::Sint => naga::ImageClass::Sampled { + kind: naga::ScalarKind::Sint, + multi, + }, + wgt::TextureSampleType::Uint => naga::ImageClass::Sampled { + kind: naga::ScalarKind::Uint, + multi, + }, + wgt::TextureSampleType::Depth => naga::ImageClass::Depth { multi }, + }; + (class, GlobalUse::READ | GlobalUse::QUERY) + } + BindingType::StorageTexture { + access, + format, + view_dimension: _, + } => { + let naga_format = map_storage_format_to_naga(format) + .ok_or(BindingError::BadStorageFormat(format))?; + let (naga_access, usage) = match access { + wgt::StorageTextureAccess::ReadOnly => ( + naga::StorageAccess::LOAD, + GlobalUse::READ | GlobalUse::QUERY, + ), + wgt::StorageTextureAccess::WriteOnly => ( + naga::StorageAccess::STORE, + GlobalUse::WRITE | GlobalUse::QUERY, + ), + wgt::StorageTextureAccess::ReadWrite => { + (naga::StorageAccess::all(), GlobalUse::all()) + } + }; + ( + naga::ImageClass::Storage { + format: naga_format, + access: naga_access, + }, + usage, + ) + } + _ => return Err(BindingError::WrongType), + }; + if class != expected_class { + return Err(BindingError::WrongTextureClass { + binding: expected_class, + shader: class, + }); + } + usage + } + }; + + if allowed_usage.contains(shader_usage) { + Ok(()) + } else { + Err(BindingError::WrongUsage { + required: shader_usage, + allowed: allowed_usage, + }) + } + } + + fn derive_binding_type( + &self, + shader_usage: GlobalUse, + features: wgt::Features, + ) -> Result<BindingType, BindingError> { + Ok(match self.ty { + ResourceType::Buffer { size } => BindingType::Buffer { + ty: match self.class { + naga::AddressSpace::Uniform => wgt::BufferBindingType::Uniform, + naga::AddressSpace::Storage { .. } => wgt::BufferBindingType::Storage { + read_only: !shader_usage.contains(GlobalUse::WRITE), + }, + _ => return Err(BindingError::WrongType), + }, + has_dynamic_offset: false, + min_binding_size: Some(size), + }, + ResourceType::Sampler { comparison } => BindingType::Sampler(if comparison { + wgt::SamplerBindingType::Comparison + } else { + wgt::SamplerBindingType::Filtering + }), + ResourceType::Texture { + dim, + arrayed, + class, + } => { + let view_dimension = match dim { + naga::ImageDimension::D1 => wgt::TextureViewDimension::D1, + naga::ImageDimension::D2 if arrayed => wgt::TextureViewDimension::D2Array, + naga::ImageDimension::D2 => wgt::TextureViewDimension::D2, + naga::ImageDimension::D3 => wgt::TextureViewDimension::D3, + naga::ImageDimension::Cube if arrayed => wgt::TextureViewDimension::CubeArray, + naga::ImageDimension::Cube => wgt::TextureViewDimension::Cube, + }; + match class { + naga::ImageClass::Sampled { multi, kind } => BindingType::Texture { + sample_type: match kind { + naga::ScalarKind::Float => { + wgt::TextureSampleType::Float { filterable: true } + } + naga::ScalarKind::Sint => wgt::TextureSampleType::Sint, + naga::ScalarKind::Uint => wgt::TextureSampleType::Uint, + naga::ScalarKind::Bool => unreachable!(), + }, + view_dimension, + multisampled: multi, + }, + naga::ImageClass::Depth { multi } => BindingType::Texture { + sample_type: wgt::TextureSampleType::Depth, + view_dimension, + multisampled: multi, + }, + naga::ImageClass::Storage { format, .. } => BindingType::StorageTexture { + access: if !shader_usage.contains(GlobalUse::READ) { + wgt::StorageTextureAccess::WriteOnly + } else if !features + .contains(wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES) + { + return Err(BindingError::UnsupportedTextureStorageAccess( + shader_usage, + )); + } else if shader_usage.contains(GlobalUse::WRITE) { + wgt::StorageTextureAccess::ReadWrite + } else { + wgt::StorageTextureAccess::ReadOnly + }, + view_dimension, + format: { + let f = map_storage_format_from_naga(format); + let original = map_storage_format_to_naga(f) + .ok_or(BindingError::BadStorageFormat(f))?; + debug_assert_eq!(format, original); + f + }, + }, + } + } + }) + } +} + +impl NumericType { + fn from_vertex_format(format: wgt::VertexFormat) -> Self { + use naga::{ScalarKind as Sk, VectorSize as Vs}; + use wgt::VertexFormat as Vf; + + let (dim, kind, width) = match format { + Vf::Uint32 => (NumericDimension::Scalar, Sk::Uint, 4), + Vf::Uint8x2 | Vf::Uint16x2 | Vf::Uint32x2 => { + (NumericDimension::Vector(Vs::Bi), Sk::Uint, 4) + } + Vf::Uint32x3 => (NumericDimension::Vector(Vs::Tri), Sk::Uint, 4), + Vf::Uint8x4 | Vf::Uint16x4 | Vf::Uint32x4 => { + (NumericDimension::Vector(Vs::Quad), Sk::Uint, 4) + } + Vf::Sint32 => (NumericDimension::Scalar, Sk::Sint, 4), + Vf::Sint8x2 | Vf::Sint16x2 | Vf::Sint32x2 => { + (NumericDimension::Vector(Vs::Bi), Sk::Sint, 4) + } + Vf::Sint32x3 => (NumericDimension::Vector(Vs::Tri), Sk::Sint, 4), + Vf::Sint8x4 | Vf::Sint16x4 | Vf::Sint32x4 => { + (NumericDimension::Vector(Vs::Quad), Sk::Sint, 4) + } + Vf::Float32 => (NumericDimension::Scalar, Sk::Float, 4), + Vf::Unorm8x2 + | Vf::Snorm8x2 + | Vf::Unorm16x2 + | Vf::Snorm16x2 + | Vf::Float16x2 + | Vf::Float32x2 => (NumericDimension::Vector(Vs::Bi), Sk::Float, 4), + Vf::Float32x3 => (NumericDimension::Vector(Vs::Tri), Sk::Float, 4), + Vf::Unorm8x4 + | Vf::Snorm8x4 + | Vf::Unorm16x4 + | Vf::Snorm16x4 + | Vf::Float16x4 + | Vf::Float32x4 => (NumericDimension::Vector(Vs::Quad), Sk::Float, 4), + Vf::Float64 => (NumericDimension::Scalar, Sk::Float, 8), + Vf::Float64x2 => (NumericDimension::Vector(Vs::Bi), Sk::Float, 8), + Vf::Float64x3 => (NumericDimension::Vector(Vs::Tri), Sk::Float, 8), + Vf::Float64x4 => (NumericDimension::Vector(Vs::Quad), Sk::Float, 8), + }; + + NumericType { + dim, + kind, + //Note: Shader always sees data as int, uint, or float. + // It doesn't know if the original is normalized in a tighter form. + width, + } + } + + fn from_texture_format(format: wgt::TextureFormat) -> Self { + use naga::{ScalarKind as Sk, VectorSize as Vs}; + use wgt::TextureFormat as Tf; + + let (dim, kind) = match format { + Tf::R8Unorm | Tf::R8Snorm | Tf::R16Float | Tf::R32Float => { + (NumericDimension::Scalar, Sk::Float) + } + Tf::R8Uint | Tf::R16Uint | Tf::R32Uint => (NumericDimension::Scalar, Sk::Uint), + Tf::R8Sint | Tf::R16Sint | Tf::R32Sint => (NumericDimension::Scalar, Sk::Sint), + Tf::Rg8Unorm | Tf::Rg8Snorm | Tf::Rg16Float | Tf::Rg32Float => { + (NumericDimension::Vector(Vs::Bi), Sk::Float) + } + Tf::Rg8Uint | Tf::Rg16Uint | Tf::Rg32Uint => { + (NumericDimension::Vector(Vs::Bi), Sk::Uint) + } + Tf::Rg8Sint | Tf::Rg16Sint | Tf::Rg32Sint => { + (NumericDimension::Vector(Vs::Bi), Sk::Sint) + } + Tf::R16Snorm | Tf::R16Unorm => (NumericDimension::Scalar, Sk::Float), + Tf::Rg16Snorm | Tf::Rg16Unorm => (NumericDimension::Vector(Vs::Bi), Sk::Float), + Tf::Rgba16Snorm | Tf::Rgba16Unorm => (NumericDimension::Vector(Vs::Quad), Sk::Float), + Tf::Rgba8Unorm + | Tf::Rgba8UnormSrgb + | Tf::Rgba8Snorm + | Tf::Bgra8Unorm + | Tf::Bgra8UnormSrgb + | Tf::Rgb10a2Unorm + | Tf::Rgba16Float + | Tf::Rgba32Float => (NumericDimension::Vector(Vs::Quad), Sk::Float), + Tf::Rgba8Uint | Tf::Rgba16Uint | Tf::Rgba32Uint => { + (NumericDimension::Vector(Vs::Quad), Sk::Uint) + } + Tf::Rgba8Sint | Tf::Rgba16Sint | Tf::Rgba32Sint => { + (NumericDimension::Vector(Vs::Quad), Sk::Sint) + } + Tf::Rg11b10Float => (NumericDimension::Vector(Vs::Tri), Sk::Float), + Tf::Stencil8 + | Tf::Depth16Unorm + | Tf::Depth32Float + | Tf::Depth32FloatStencil8 + | Tf::Depth24Plus + | Tf::Depth24PlusStencil8 => { + panic!("Unexpected depth format") + } + Tf::Rgb9e5Ufloat => (NumericDimension::Vector(Vs::Tri), Sk::Float), + Tf::Bc1RgbaUnorm + | Tf::Bc1RgbaUnormSrgb + | Tf::Bc2RgbaUnorm + | Tf::Bc2RgbaUnormSrgb + | Tf::Bc3RgbaUnorm + | Tf::Bc3RgbaUnormSrgb + | Tf::Bc7RgbaUnorm + | Tf::Bc7RgbaUnormSrgb + | Tf::Etc2Rgb8A1Unorm + | Tf::Etc2Rgb8A1UnormSrgb + | Tf::Etc2Rgba8Unorm + | Tf::Etc2Rgba8UnormSrgb => (NumericDimension::Vector(Vs::Quad), Sk::Float), + Tf::Bc4RUnorm | Tf::Bc4RSnorm | Tf::EacR11Unorm | Tf::EacR11Snorm => { + (NumericDimension::Scalar, Sk::Float) + } + Tf::Bc5RgUnorm | Tf::Bc5RgSnorm | Tf::EacRg11Unorm | Tf::EacRg11Snorm => { + (NumericDimension::Vector(Vs::Bi), Sk::Float) + } + Tf::Bc6hRgbUfloat | Tf::Bc6hRgbSfloat | Tf::Etc2Rgb8Unorm | Tf::Etc2Rgb8UnormSrgb => { + (NumericDimension::Vector(Vs::Tri), Sk::Float) + } + Tf::Astc { + block: _, + channel: _, + } => (NumericDimension::Vector(Vs::Quad), Sk::Float), + }; + + NumericType { + dim, + kind, + //Note: Shader always sees data as int, uint, or float. + // It doesn't know if the original is normalized in a tighter form. + width: 4, + } + } + + fn is_subtype_of(&self, other: &NumericType) -> bool { + if self.width > other.width { + return false; + } + if self.kind != other.kind { + return false; + } + match (self.dim, other.dim) { + (NumericDimension::Scalar, NumericDimension::Scalar) => true, + (NumericDimension::Scalar, NumericDimension::Vector(_)) => true, + (NumericDimension::Vector(s0), NumericDimension::Vector(s1)) => s0 <= s1, + (NumericDimension::Matrix(c0, r0), NumericDimension::Matrix(c1, r1)) => { + c0 == c1 && r0 == r1 + } + _ => false, + } + } + + fn is_compatible_with(&self, other: &NumericType) -> bool { + if self.kind != other.kind { + return false; + } + match (self.dim, other.dim) { + (NumericDimension::Scalar, NumericDimension::Scalar) => true, + (NumericDimension::Scalar, NumericDimension::Vector(_)) => true, + (NumericDimension::Vector(_), NumericDimension::Vector(_)) => true, + (NumericDimension::Matrix(..), NumericDimension::Matrix(..)) => true, + _ => false, + } + } +} + +/// Return true if the fragment `format` is covered by the provided `output`. +pub fn check_texture_format( + format: wgt::TextureFormat, + output: &NumericType, +) -> Result<(), NumericType> { + let nt = NumericType::from_texture_format(format); + if nt.is_subtype_of(output) { + Ok(()) + } else { + Err(nt) + } +} + +pub type StageIo = FastHashMap<wgt::ShaderLocation, InterfaceVar>; + +impl Interface { + fn populate( + list: &mut Vec<Varying>, + binding: Option<&naga::Binding>, + ty: naga::Handle<naga::Type>, + arena: &naga::UniqueArena<naga::Type>, + ) { + let numeric_ty = match arena[ty].inner { + naga::TypeInner::Scalar { kind, width } => NumericType { + dim: NumericDimension::Scalar, + kind, + width, + }, + naga::TypeInner::Vector { size, kind, width } => NumericType { + dim: NumericDimension::Vector(size), + kind, + width, + }, + naga::TypeInner::Matrix { + columns, + rows, + width, + } => NumericType { + dim: NumericDimension::Matrix(columns, rows), + kind: naga::ScalarKind::Float, + width, + }, + naga::TypeInner::Struct { ref members, .. } => { + for member in members { + Self::populate(list, member.binding.as_ref(), member.ty, arena); + } + return; + } + ref other => { + //Note: technically this should be at least `log::error`, but + // the reality is - every shader coming from `glslc` outputs an array + // of clip distances and hits this path :( + // So we lower it to `log::warn` to be less annoying. + log::warn!("Unexpected varying type: {:?}", other); + return; + } + }; + + let varying = match binding { + Some(&naga::Binding::Location { + location, + interpolation, + sampling, + }) => Varying::Local { + location, + iv: InterfaceVar { + ty: numeric_ty, + interpolation, + sampling, + }, + }, + Some(&naga::Binding::BuiltIn(built_in)) => Varying::BuiltIn(built_in), + None => { + log::error!("Missing binding for a varying"); + return; + } + }; + list.push(varying); + } + + pub fn new( + module: &naga::Module, + info: &naga::valid::ModuleInfo, + features: wgt::Features, + limits: wgt::Limits, + ) -> Self { + let mut resources = naga::Arena::new(); + let mut resource_mapping = FastHashMap::default(); + for (var_handle, var) in module.global_variables.iter() { + let bind = match var.binding { + Some(ref br) => br.clone(), + _ => continue, + }; + let naga_ty = &module.types[var.ty].inner; + + let inner_ty = match *naga_ty { + naga::TypeInner::BindingArray { base, .. } => &module.types[base].inner, + ref ty => ty, + }; + + let ty = match *inner_ty { + naga::TypeInner::Image { + dim, + arrayed, + class, + } => ResourceType::Texture { + dim, + arrayed, + class, + }, + naga::TypeInner::Sampler { comparison } => ResourceType::Sampler { comparison }, + naga::TypeInner::Array { stride, .. } => ResourceType::Buffer { + size: wgt::BufferSize::new(stride as u64).unwrap(), + }, + ref other => ResourceType::Buffer { + size: wgt::BufferSize::new(other.size(&module.constants) as u64).unwrap(), + }, + }; + let handle = resources.append( + Resource { + name: var.name.clone(), + bind, + ty, + class: var.space, + }, + Default::default(), + ); + resource_mapping.insert(var_handle, handle); + } + + let mut entry_points = FastHashMap::default(); + entry_points.reserve(module.entry_points.len()); + for (index, entry_point) in module.entry_points.iter().enumerate() { + let info = info.get_entry_point(index); + let mut ep = EntryPoint::default(); + for arg in entry_point.function.arguments.iter() { + Self::populate(&mut ep.inputs, arg.binding.as_ref(), arg.ty, &module.types); + } + if let Some(ref result) = entry_point.function.result { + Self::populate( + &mut ep.outputs, + result.binding.as_ref(), + result.ty, + &module.types, + ); + } + + for (var_handle, var) in module.global_variables.iter() { + let usage = info[var_handle]; + if usage.is_empty() { + continue; + } + if var.binding.is_some() { + ep.resources.push((resource_mapping[&var_handle], usage)); + } + } + + for key in info.sampling_set.iter() { + ep.sampling_pairs + .insert((resource_mapping[&key.image], resource_mapping[&key.sampler])); + } + + ep.workgroup_size = entry_point.workgroup_size; + + entry_points.insert((entry_point.stage, entry_point.name.clone()), ep); + } + + Self { + features, + limits, + resources, + entry_points, + } + } + + pub fn check_stage( + &self, + given_layouts: Option<&[&BindEntryMap]>, + derived_layouts: &mut [BindEntryMap], + shader_binding_sizes: &mut FastHashMap<naga::ResourceBinding, wgt::BufferSize>, + entry_point_name: &str, + stage_bit: wgt::ShaderStages, + inputs: StageIo, + compare_function: Option<wgt::CompareFunction>, + ) -> Result<StageIo, StageError> { + // Since a shader module can have multiple entry points with the same name, + // we need to look for one with the right execution model. + let shader_stage = match stage_bit { + wgt::ShaderStages::VERTEX => naga::ShaderStage::Vertex, + wgt::ShaderStages::FRAGMENT => naga::ShaderStage::Fragment, + wgt::ShaderStages::COMPUTE => naga::ShaderStage::Compute, + _ => unreachable!(), + }; + let pair = (shader_stage, entry_point_name.to_string()); + let entry_point = self + .entry_points + .get(&pair) + .ok_or(StageError::MissingEntryPoint(pair.1))?; + + // check resources visibility + for &(handle, usage) in entry_point.resources.iter() { + let res = &self.resources[handle]; + let result = match given_layouts { + Some(layouts) => { + // update the required binding size for this buffer + if let ResourceType::Buffer { size } = res.ty { + match shader_binding_sizes.entry(res.bind.clone()) { + Entry::Occupied(e) => { + *e.into_mut() = size.max(*e.get()); + } + Entry::Vacant(e) => { + e.insert(size); + } + } + } + layouts + .get(res.bind.group as usize) + .and_then(|map| map.get(&res.bind.binding)) + .ok_or(BindingError::Missing) + .and_then(|entry| { + if entry.visibility.contains(stage_bit) { + Ok(entry) + } else { + Err(BindingError::Invisible) + } + }) + .and_then(|entry| res.check_binding_use(entry, usage)) + } + None => derived_layouts + .get_mut(res.bind.group as usize) + .ok_or(BindingError::Missing) + .and_then(|set| { + let ty = res.derive_binding_type(usage, self.features)?; + match set.entry(res.bind.binding) { + Entry::Occupied(e) if e.get().ty != ty => { + return Err(BindingError::InconsistentlyDerivedType) + } + Entry::Occupied(e) => { + e.into_mut().visibility |= stage_bit; + } + Entry::Vacant(e) => { + e.insert(BindGroupLayoutEntry { + binding: res.bind.binding, + ty, + visibility: stage_bit, + count: None, + }); + } + } + Ok(()) + }), + }; + if let Err(error) = result { + return Err(StageError::Binding(res.bind.clone(), error)); + } + } + + // check the compatibility between textures and samplers + if let Some(layouts) = given_layouts { + for &(texture_handle, sampler_handle) in entry_point.sampling_pairs.iter() { + let texture_bind = &self.resources[texture_handle].bind; + let sampler_bind = &self.resources[sampler_handle].bind; + let texture_layout = &layouts[texture_bind.group as usize][&texture_bind.binding]; + let sampler_layout = &layouts[sampler_bind.group as usize][&sampler_bind.binding]; + assert!(texture_layout.visibility.contains(stage_bit)); + assert!(sampler_layout.visibility.contains(stage_bit)); + + let error = match texture_layout.ty { + wgt::BindingType::Texture { + sample_type: wgt::TextureSampleType::Float { filterable }, + .. + } => match sampler_layout.ty { + wgt::BindingType::Sampler(wgt::SamplerBindingType::Filtering) + if !filterable => + { + Some(FilteringError::NonFilterable) + } + _ => None, + }, + wgt::BindingType::Texture { + sample_type: wgt::TextureSampleType::Sint, + .. + } + | wgt::BindingType::Texture { + sample_type: wgt::TextureSampleType::Uint, + .. + } => Some(FilteringError::Integer), + _ => None, // unreachable, really + }; + + if let Some(error) = error { + return Err(StageError::Filtering { + texture: texture_bind.clone(), + sampler: sampler_bind.clone(), + error, + }); + } + } + } + + // check workgroup size limits + if shader_stage == naga::ShaderStage::Compute { + let max_workgroup_size_limits = [ + self.limits.max_compute_workgroup_size_x, + self.limits.max_compute_workgroup_size_y, + self.limits.max_compute_workgroup_size_z, + ]; + let total_invocations = entry_point.workgroup_size.iter().product::<u32>(); + + if entry_point.workgroup_size.iter().any(|&s| s == 0) + || total_invocations > self.limits.max_compute_invocations_per_workgroup + || entry_point.workgroup_size[0] > max_workgroup_size_limits[0] + || entry_point.workgroup_size[1] > max_workgroup_size_limits[1] + || entry_point.workgroup_size[2] > max_workgroup_size_limits[2] + { + return Err(StageError::InvalidWorkgroupSize { + current: entry_point.workgroup_size, + current_total: total_invocations, + limit: max_workgroup_size_limits, + total: self.limits.max_compute_invocations_per_workgroup, + }); + } + } + + let mut inter_stage_components = 0; + + // check inputs compatibility + for input in entry_point.inputs.iter() { + match *input { + Varying::Local { location, ref iv } => { + let result = + inputs + .get(&location) + .ok_or(InputError::Missing) + .and_then(|provided| { + let (compatible, num_components) = match shader_stage { + // For vertex attributes, there are defaults filled out + // by the driver if data is not provided. + naga::ShaderStage::Vertex => { + // vertex inputs don't count towards inter-stage + (iv.ty.is_compatible_with(&provided.ty), 0) + } + naga::ShaderStage::Fragment => { + if iv.interpolation != provided.interpolation { + return Err(InputError::InterpolationMismatch( + provided.interpolation, + )); + } + if iv.sampling != provided.sampling { + return Err(InputError::SamplingMismatch( + provided.sampling, + )); + } + ( + iv.ty.is_subtype_of(&provided.ty), + iv.ty.dim.num_components(), + ) + } + naga::ShaderStage::Compute => (false, 0), + }; + if compatible { + Ok(num_components) + } else { + Err(InputError::WrongType(provided.ty)) + } + }); + match result { + Ok(num_components) => { + inter_stage_components += num_components; + } + Err(error) => { + return Err(StageError::Input { + location, + var: iv.clone(), + error, + }) + } + } + } + Varying::BuiltIn(_) => {} + } + } + + // Check all vertex outputs and make sure the fragment shader consumes them. + if shader_stage == naga::ShaderStage::Fragment { + for &index in inputs.keys() { + // This is a linear scan, but the count should be low enough + // that this should be fine. + let found = entry_point.inputs.iter().any(|v| match *v { + Varying::Local { location, .. } => location == index, + Varying::BuiltIn(_) => false, + }); + + if !found { + return Err(StageError::InputNotConsumed { location: index }); + } + } + } + + if shader_stage == naga::ShaderStage::Vertex { + for output in entry_point.outputs.iter() { + //TODO: count builtins towards the limit? + inter_stage_components += match *output { + Varying::Local { ref iv, .. } => iv.ty.dim.num_components(), + Varying::BuiltIn(_) => 0, + }; + + if let Some( + cmp @ wgt::CompareFunction::Equal | cmp @ wgt::CompareFunction::NotEqual, + ) = compare_function + { + if let Varying::BuiltIn(naga::BuiltIn::Position { invariant: false }) = *output + { + log::warn!( + "Vertex shader with entry point {entry_point_name} outputs a @builtin(position) without the @invariant \ + attribute and is used in a pipeline with {cmp:?}. On some machines, this can cause bad artifacting as {cmp:?} assumes \ + the values output from the vertex shader exactly match the value in the depth buffer. The @invariant attribute on the \ + @builtin(position) vertex output ensures that the exact same pixel depths are used every render." + ); + } + } + } + } + + if inter_stage_components > self.limits.max_inter_stage_shader_components { + return Err(StageError::TooManyVaryings { + used: inter_stage_components, + limit: self.limits.max_inter_stage_shader_components, + }); + } + + let outputs = entry_point + .outputs + .iter() + .filter_map(|output| match *output { + Varying::Local { location, ref iv } => Some((location, iv.clone())), + Varying::BuiltIn(_) => None, + }) + .collect(); + Ok(outputs) + } +} |