From 2aa4a82499d4becd2284cdb482213d541b8804dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 28 Apr 2024 16:29:10 +0200 Subject: Adding upstream version 86.0.1. Signed-off-by: Daniel Baumann --- gfx/wgpu/wgpu-core/Cargo.toml | 68 + gfx/wgpu/wgpu-core/build.rs | 20 + gfx/wgpu/wgpu-core/src/binding_model.rs | 632 ++++ gfx/wgpu/wgpu-core/src/command/allocator.rs | 268 ++ gfx/wgpu/wgpu-core/src/command/bind.rs | 295 ++ gfx/wgpu/wgpu-core/src/command/bundle.rs | 1230 ++++++++ gfx/wgpu/wgpu-core/src/command/compute.rs | 657 +++++ gfx/wgpu/wgpu-core/src/command/draw.rs | 180 ++ gfx/wgpu/wgpu-core/src/command/mod.rs | 362 +++ gfx/wgpu/wgpu-core/src/command/render.rs | 2078 +++++++++++++ gfx/wgpu/wgpu-core/src/command/transfer.rs | 789 +++++ gfx/wgpu/wgpu-core/src/conv.rs | 833 ++++++ gfx/wgpu/wgpu-core/src/device/alloc.rs | 294 ++ gfx/wgpu/wgpu-core/src/device/descriptor.rs | 168 ++ gfx/wgpu/wgpu-core/src/device/life.rs | 760 +++++ gfx/wgpu/wgpu-core/src/device/mod.rs | 4217 +++++++++++++++++++++++++++ gfx/wgpu/wgpu-core/src/device/queue.rs | 696 +++++ gfx/wgpu/wgpu-core/src/device/trace.rs | 192 ++ gfx/wgpu/wgpu-core/src/hub.rs | 866 ++++++ gfx/wgpu/wgpu-core/src/id.rs | 196 ++ gfx/wgpu/wgpu-core/src/instance.rs | 840 ++++++ gfx/wgpu/wgpu-core/src/lib.rs | 271 ++ gfx/wgpu/wgpu-core/src/macros.rs | 226 ++ gfx/wgpu/wgpu-core/src/pipeline.rs | 254 ++ gfx/wgpu/wgpu-core/src/resource.rs | 447 +++ gfx/wgpu/wgpu-core/src/swap_chain.rs | 294 ++ gfx/wgpu/wgpu-core/src/track/buffer.rs | 241 ++ gfx/wgpu/wgpu-core/src/track/mod.rs | 593 ++++ gfx/wgpu/wgpu-core/src/track/range.rs | 399 +++ gfx/wgpu/wgpu-core/src/track/texture.rs | 466 +++ gfx/wgpu/wgpu-core/src/validation.rs | 966 ++++++ 31 files changed, 19798 insertions(+) create mode 100644 gfx/wgpu/wgpu-core/Cargo.toml create mode 100644 gfx/wgpu/wgpu-core/build.rs create mode 100644 gfx/wgpu/wgpu-core/src/binding_model.rs create mode 100644 gfx/wgpu/wgpu-core/src/command/allocator.rs create mode 100644 gfx/wgpu/wgpu-core/src/command/bind.rs create mode 100644 gfx/wgpu/wgpu-core/src/command/bundle.rs create mode 100644 gfx/wgpu/wgpu-core/src/command/compute.rs create mode 100644 gfx/wgpu/wgpu-core/src/command/draw.rs create mode 100644 gfx/wgpu/wgpu-core/src/command/mod.rs create mode 100644 gfx/wgpu/wgpu-core/src/command/render.rs create mode 100644 gfx/wgpu/wgpu-core/src/command/transfer.rs create mode 100644 gfx/wgpu/wgpu-core/src/conv.rs create mode 100644 gfx/wgpu/wgpu-core/src/device/alloc.rs create mode 100644 gfx/wgpu/wgpu-core/src/device/descriptor.rs create mode 100644 gfx/wgpu/wgpu-core/src/device/life.rs create mode 100644 gfx/wgpu/wgpu-core/src/device/mod.rs create mode 100644 gfx/wgpu/wgpu-core/src/device/queue.rs create mode 100644 gfx/wgpu/wgpu-core/src/device/trace.rs create mode 100644 gfx/wgpu/wgpu-core/src/hub.rs create mode 100644 gfx/wgpu/wgpu-core/src/id.rs create mode 100644 gfx/wgpu/wgpu-core/src/instance.rs create mode 100644 gfx/wgpu/wgpu-core/src/lib.rs create mode 100644 gfx/wgpu/wgpu-core/src/macros.rs create mode 100644 gfx/wgpu/wgpu-core/src/pipeline.rs create mode 100644 gfx/wgpu/wgpu-core/src/resource.rs create mode 100644 gfx/wgpu/wgpu-core/src/swap_chain.rs create mode 100644 gfx/wgpu/wgpu-core/src/track/buffer.rs create mode 100644 gfx/wgpu/wgpu-core/src/track/mod.rs create mode 100644 gfx/wgpu/wgpu-core/src/track/range.rs create mode 100644 gfx/wgpu/wgpu-core/src/track/texture.rs create mode 100644 gfx/wgpu/wgpu-core/src/validation.rs (limited to 'gfx/wgpu/wgpu-core') diff --git a/gfx/wgpu/wgpu-core/Cargo.toml b/gfx/wgpu/wgpu-core/Cargo.toml new file mode 100644 index 0000000000..2868fc9343 --- /dev/null +++ b/gfx/wgpu/wgpu-core/Cargo.toml @@ -0,0 +1,68 @@ +[package] +name = "wgpu-core" +version = "0.6.0" +authors = ["wgpu developers"] +edition = "2018" +description = "WebGPU core logic on gfx-hal" +homepage = "https://github.com/gfx-rs/wgpu" +repository = "https://github.com/gfx-rs/wgpu" +keywords = ["graphics"] +license = "MPL-2.0" + +[lib] + +[features] +default = [] +# Enable API tracing +trace = ["ron", "serde", "wgt/trace"] +# Enable API replaying +replay = ["serde", "wgt/replay"] +# Enable serializable compute/render passes, and bundle encoders. +serial-pass = ["serde", "wgt/serde", "arrayvec/serde"] + +[dependencies] +arrayvec = "0.5" +bitflags = "1.0" +copyless = "0.1" +fxhash = "0.2" +hal = { package = "gfx-hal", git = "https://github.com/gfx-rs/gfx", rev = "1d14789011cb892f4c1a205d3f8a87d479c2e354" } +gfx-backend-empty = { git = "https://github.com/gfx-rs/gfx", rev = "1d14789011cb892f4c1a205d3f8a87d479c2e354" } +parking_lot = "0.11" +raw-window-handle = { version = "0.3", optional = true } +ron = { version = "0.6", optional = true } +serde = { version = "1.0", features = ["serde_derive"], optional = true } +smallvec = "1" +tracing = { version = "0.1", default-features = false, features = ["std"] } +thiserror = "1" +gpu-alloc = { git = "https://github.com/zakarumych/gpu-alloc", rev = "d07be73f9439a37c89f5b72f2500cbf0eb4ff613" } +gpu-descriptor = { git = "https://github.com/zakarumych/gpu-descriptor", rev = "831460c4b5120d9a74744d542f39a95b9816b5ab"} + +[dependencies.naga] +version = "0.2" +git = "https://github.com/gfx-rs/naga" +rev = "96c80738650822de35f77ab6a589f309460c8f39" +features = ["spv-in", "spv-out", "wgsl-in"] + +[dependencies.wgt] +path = "../wgpu-types" +package = "wgpu-types" +version = "0.6" + +[target.'cfg(all(unix, not(target_os = "ios"), not(target_os = "macos")))'.dependencies] +gfx-backend-vulkan = { git = "https://github.com/gfx-rs/gfx", rev = "1d14789011cb892f4c1a205d3f8a87d479c2e354" } +#gfx-backend-gl = { git = "https://github.com/gfx-rs/gfx", rev = "1d14789011cb892f4c1a205d3f8a87d479c2e354" } + +[target.'cfg(any(target_os = "ios", target_os = "macos"))'.dependencies] +gfx-backend-metal = { git = "https://github.com/gfx-rs/gfx", rev = "1d14789011cb892f4c1a205d3f8a87d479c2e354" } +gfx-backend-vulkan = { git = "https://github.com/gfx-rs/gfx", rev = "1d14789011cb892f4c1a205d3f8a87d479c2e354", optional = true } + +[target.'cfg(windows)'.dependencies] +gfx-backend-dx12 = { git = "https://github.com/gfx-rs/gfx", rev = "1d14789011cb892f4c1a205d3f8a87d479c2e354" } +gfx-backend-dx11 = { git = "https://github.com/gfx-rs/gfx", rev = "1d14789011cb892f4c1a205d3f8a87d479c2e354" } +gfx-backend-vulkan = { git = "https://github.com/gfx-rs/gfx", rev = "1d14789011cb892f4c1a205d3f8a87d479c2e354" } + +[dev-dependencies] +loom = "0.3" + +[build-dependencies] +cfg_aliases = "0.1" diff --git a/gfx/wgpu/wgpu-core/build.rs b/gfx/wgpu/wgpu-core/build.rs new file mode 100644 index 0000000000..382e47db90 --- /dev/null +++ b/gfx/wgpu/wgpu-core/build.rs @@ -0,0 +1,20 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +fn main() { + // Setup cfg aliases + cfg_aliases::cfg_aliases! { + // Vendors/systems + ios: { target_os = "ios" }, + macos: { target_os = "macos" }, + apple: { any(ios, macos) }, + + // Backends + vulkan: { any(windows, all(unix, not(apple)), feature = "gfx-backend-vulkan") }, + metal: { apple }, + dx12: { windows }, + dx11: { windows }, + gl: { all(not(unix), not(apple), not(windows)) }, + } +} diff --git a/gfx/wgpu/wgpu-core/src/binding_model.rs b/gfx/wgpu/wgpu-core/src/binding_model.rs new file mode 100644 index 0000000000..10126a9b97 --- /dev/null +++ b/gfx/wgpu/wgpu-core/src/binding_model.rs @@ -0,0 +1,632 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use crate::{ + device::{ + descriptor::{DescriptorSet, DescriptorTotalCount}, + DeviceError, SHADER_STAGE_COUNT, + }, + hub::Resource, + id::{BindGroupLayoutId, BufferId, DeviceId, SamplerId, TextureViewId, Valid}, + track::{TrackerSet, DUMMY_SELECTOR}, + validation::{MissingBufferUsageError, MissingTextureUsageError}, + FastHashMap, Label, LifeGuard, MultiRefCount, Stored, MAX_BIND_GROUPS, +}; + +use arrayvec::ArrayVec; + +#[cfg(feature = "replay")] +use serde::Deserialize; +#[cfg(feature = "trace")] +use serde::Serialize; + +use std::{ + borrow::{Borrow, Cow}, + ops::Range, +}; + +use thiserror::Error; + +#[derive(Clone, Debug, Error)] +pub enum CreateBindGroupLayoutError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("arrays of bindings unsupported for this type of binding")] + ArrayUnsupported, + #[error("conflicting binding at index {0}")] + ConflictBinding(u32), + #[error("required device feature is missing: {0:?}")] + MissingFeature(wgt::Features), + #[error(transparent)] + TooManyBindings(BindingTypeMaxCountError), +} + +#[derive(Clone, Debug, Error)] +pub enum CreateBindGroupError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("bind group layout is invalid")] + InvalidLayout, + #[error("buffer {0:?} is invalid or destroyed")] + InvalidBuffer(BufferId), + #[error("texture view {0:?} is invalid")] + InvalidTextureView(TextureViewId), + #[error("sampler {0:?} is invalid")] + InvalidSampler(SamplerId), + #[error("binding count declared with {expected} items, but {actual} items were provided")] + BindingArrayLengthMismatch { actual: usize, expected: usize }, + #[error("bound buffer range {range:?} does not fit in buffer of size {size}")] + BindingRangeTooLarge { + range: Range, + size: u64, + }, + #[error("buffer binding size {actual} is less than minimum {min}")] + BindingSizeTooSmall { actual: u64, min: u64 }, + #[error("number of bindings in bind group descriptor ({actual}) does not match the number of bindings defined in the bind group layout ({expected})")] + BindingsNumMismatch { actual: usize, expected: usize }, + #[error("binding {0} is used at least twice in the descriptor")] + DuplicateBinding(u32), + #[error("unable to find a corresponding declaration for the given binding {0}")] + MissingBindingDeclaration(u32), + #[error(transparent)] + MissingBufferUsage(#[from] MissingBufferUsageError), + #[error(transparent)] + MissingTextureUsage(#[from] MissingTextureUsageError), + #[error("required device features not enabled: {0:?}")] + MissingFeatures(wgt::Features), + #[error("binding declared as a single item, but bind group is using it as an array")] + SingleBindingExpected, + #[error("unable to create a bind group with a swap chain image")] + SwapChainImage, + #[error("buffer offset {0} does not respect `BIND_BUFFER_ALIGNMENT`")] + UnalignedBufferOffset(wgt::BufferAddress), + #[error("uniform buffer binding range exceeds `max_uniform_buffer_binding_size` limit")] + UniformBufferRangeTooLarge, + #[error("binding {binding} has a different type ({actual:?}) than the one in the layout ({expected:?})")] + WrongBindingType { + // Index of the binding + binding: u32, + // The type given to the function + actual: wgt::BindingType, + // Human-readable description of expected types + expected: &'static str, + }, + #[error("the given sampler is/is not a comparison sampler, while the layout type indicates otherwise")] + WrongSamplerComparison, + #[error("bound texture views can not have both depth and stencil aspects enabled")] + DepthStencilAspect, +} + +#[derive(Clone, Debug, Error)] +pub enum BindingZone { + #[error("stage {0:?}")] + Stage(wgt::ShaderStage), + #[error("whole pipeline")] + Pipeline, +} + +#[derive(Clone, Debug, Error)] +#[error("too many bindings of type {kind:?} in {zone}, limit is {count}")] +pub struct BindingTypeMaxCountError { + pub kind: BindingTypeMaxCountErrorKind, + pub zone: BindingZone, + pub count: u32, +} + +#[derive(Clone, Debug)] +pub enum BindingTypeMaxCountErrorKind { + DynamicUniformBuffers, + DynamicStorageBuffers, + SampledTextures, + Samplers, + StorageBuffers, + StorageTextures, + UniformBuffers, +} + +#[derive(Debug, Default)] +pub(crate) struct PerStageBindingTypeCounter { + vertex: u32, + fragment: u32, + compute: u32, +} + +impl PerStageBindingTypeCounter { + pub(crate) fn add(&mut self, stage: wgt::ShaderStage, count: u32) { + if stage.contains(wgt::ShaderStage::VERTEX) { + self.vertex += count; + } + if stage.contains(wgt::ShaderStage::FRAGMENT) { + self.fragment += count; + } + if stage.contains(wgt::ShaderStage::COMPUTE) { + self.compute += count; + } + } + + pub(crate) fn max(&self) -> (BindingZone, u32) { + let max_value = self.vertex.max(self.fragment.max(self.compute)); + let mut stage = wgt::ShaderStage::NONE; + if max_value == self.vertex { + stage |= wgt::ShaderStage::VERTEX + } + if max_value == self.fragment { + stage |= wgt::ShaderStage::FRAGMENT + } + if max_value == self.compute { + stage |= wgt::ShaderStage::COMPUTE + } + (BindingZone::Stage(stage), max_value) + } + + pub(crate) fn merge(&mut self, other: &Self) { + self.vertex = self.vertex.max(other.vertex); + self.fragment = self.fragment.max(other.fragment); + self.compute = self.compute.max(other.compute); + } + + pub(crate) fn validate( + &self, + limit: u32, + kind: BindingTypeMaxCountErrorKind, + ) -> Result<(), BindingTypeMaxCountError> { + let (zone, count) = self.max(); + if limit < count { + Err(BindingTypeMaxCountError { kind, zone, count }) + } else { + Ok(()) + } + } +} + +#[derive(Debug, Default)] +pub(crate) struct BindingTypeMaxCountValidator { + dynamic_uniform_buffers: u32, + dynamic_storage_buffers: u32, + sampled_textures: PerStageBindingTypeCounter, + samplers: PerStageBindingTypeCounter, + storage_buffers: PerStageBindingTypeCounter, + storage_textures: PerStageBindingTypeCounter, + uniform_buffers: PerStageBindingTypeCounter, +} + +impl BindingTypeMaxCountValidator { + pub(crate) fn add_binding(&mut self, binding: &wgt::BindGroupLayoutEntry) { + let count = binding.count.map_or(1, |count| count.get()); + match binding.ty { + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset, + .. + } => { + self.uniform_buffers.add(binding.visibility, count); + if has_dynamic_offset { + self.dynamic_uniform_buffers += count; + } + } + wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Storage { .. }, + has_dynamic_offset, + .. + } => { + self.storage_buffers.add(binding.visibility, count); + if has_dynamic_offset { + self.dynamic_storage_buffers += count; + } + } + wgt::BindingType::Sampler { .. } => { + self.samplers.add(binding.visibility, count); + } + wgt::BindingType::Texture { .. } => { + self.sampled_textures.add(binding.visibility, count); + } + wgt::BindingType::StorageTexture { .. } => { + self.storage_textures.add(binding.visibility, count); + } + } + } + + pub(crate) fn merge(&mut self, other: &Self) { + self.dynamic_uniform_buffers += other.dynamic_uniform_buffers; + self.dynamic_storage_buffers += other.dynamic_storage_buffers; + self.sampled_textures.merge(&other.sampled_textures); + self.samplers.merge(&other.samplers); + self.storage_buffers.merge(&other.storage_buffers); + self.storage_textures.merge(&other.storage_textures); + self.uniform_buffers.merge(&other.uniform_buffers); + } + + pub(crate) fn validate(&self, limits: &wgt::Limits) -> Result<(), BindingTypeMaxCountError> { + if limits.max_dynamic_uniform_buffers_per_pipeline_layout < self.dynamic_uniform_buffers { + return Err(BindingTypeMaxCountError { + kind: BindingTypeMaxCountErrorKind::DynamicUniformBuffers, + zone: BindingZone::Pipeline, + count: self.dynamic_uniform_buffers, + }); + } + if limits.max_dynamic_storage_buffers_per_pipeline_layout < self.dynamic_storage_buffers { + return Err(BindingTypeMaxCountError { + kind: BindingTypeMaxCountErrorKind::DynamicStorageBuffers, + zone: BindingZone::Pipeline, + count: self.dynamic_storage_buffers, + }); + } + self.sampled_textures.validate( + limits.max_sampled_textures_per_shader_stage, + BindingTypeMaxCountErrorKind::SampledTextures, + )?; + self.storage_buffers.validate( + limits.max_storage_buffers_per_shader_stage, + BindingTypeMaxCountErrorKind::StorageBuffers, + )?; + self.samplers.validate( + limits.max_samplers_per_shader_stage, + BindingTypeMaxCountErrorKind::Samplers, + )?; + self.storage_buffers.validate( + limits.max_storage_buffers_per_shader_stage, + BindingTypeMaxCountErrorKind::StorageBuffers, + )?; + self.storage_textures.validate( + limits.max_storage_textures_per_shader_stage, + BindingTypeMaxCountErrorKind::StorageTextures, + )?; + self.uniform_buffers.validate( + limits.max_uniform_buffers_per_shader_stage, + BindingTypeMaxCountErrorKind::UniformBuffers, + )?; + Ok(()) + } +} + +/// Bindable resource and the slot to bind it to. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub struct BindGroupEntry<'a> { + /// Slot for which binding provides resource. Corresponds to an entry of the same + /// binding index in the [`BindGroupLayoutDescriptor`]. + pub binding: u32, + /// Resource to attach to the binding + pub resource: BindingResource<'a>, +} + +/// Describes a group of bindings and the resources to be bound. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub struct BindGroupDescriptor<'a> { + /// Debug label of the bind group. This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// The [`BindGroupLayout`] that corresponds to this bind group. + pub layout: BindGroupLayoutId, + /// The resources to bind to this bind group. + pub entries: Cow<'a, [BindGroupEntry<'a>]>, +} + +/// Describes a [`BindGroupLayout`]. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct BindGroupLayoutDescriptor<'a> { + /// Debug label of the bind group layout. This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// Array of entries in this BindGroupLayout + pub entries: Cow<'a, [wgt::BindGroupLayoutEntry]>, +} + +pub(crate) type BindEntryMap = FastHashMap; + +#[derive(Debug)] +pub struct BindGroupLayout { + pub(crate) raw: B::DescriptorSetLayout, + pub(crate) device_id: Stored, + pub(crate) multi_ref_count: MultiRefCount, + pub(crate) entries: BindEntryMap, + pub(crate) desc_count: DescriptorTotalCount, + pub(crate) dynamic_count: usize, + pub(crate) count_validator: BindingTypeMaxCountValidator, + #[cfg(debug_assertions)] + pub(crate) label: String, +} + +impl Resource for BindGroupLayout { + const TYPE: &'static str = "BindGroupLayout"; + + fn life_guard(&self) -> &LifeGuard { + unreachable!() + } + + fn label(&self) -> &str { + #[cfg(debug_assertions)] + return &self.label; + #[cfg(not(debug_assertions))] + return ""; + } +} + +#[derive(Clone, Debug, Error)] +pub enum CreatePipelineLayoutError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("bind group layout {0:?} is invalid")] + InvalidBindGroupLayout(BindGroupLayoutId), + #[error( + "push constant at index {index} has range bound {bound} not aligned to {}", + wgt::PUSH_CONSTANT_ALIGNMENT + )] + MisalignedPushConstantRange { index: usize, bound: u32 }, + #[error("device does not have required feature: {0:?}")] + MissingFeature(wgt::Features), + #[error("push constant range (index {index}) provides for stage(s) {provided:?} but there exists another range that provides stage(s) {intersected:?}. Each stage may only be provided by one range")] + MoreThanOnePushConstantRangePerStage { + index: usize, + provided: wgt::ShaderStage, + intersected: wgt::ShaderStage, + }, + #[error("push constant at index {index} has range {}..{} which exceeds device push constant size limit 0..{max}", range.start, range.end)] + PushConstantRangeTooLarge { + index: usize, + range: Range, + max: u32, + }, + #[error(transparent)] + TooManyBindings(BindingTypeMaxCountError), + #[error("bind group layout count {actual} exceeds device bind group limit {max}")] + TooManyGroups { actual: usize, max: usize }, +} + +#[derive(Clone, Debug, Error)] +pub enum PushConstantUploadError { + #[error("provided push constant with indices {offset}..{end_offset} overruns matching push constant range at index {idx}, with stage(s) {:?} and indices {:?}", range.stages, range.range)] + TooLarge { + offset: u32, + end_offset: u32, + idx: usize, + range: wgt::PushConstantRange, + }, + #[error("provided push constant is for stage(s) {actual:?}, stage with a partial match found at index {idx} with stage(s) {matched:?}, however push constants must be complete matches")] + PartialRangeMatch { + actual: wgt::ShaderStage, + idx: usize, + matched: wgt::ShaderStage, + }, + #[error("provided push constant is for stage(s) {actual:?}, but intersects a push constant range (at index {idx}) with stage(s) {missing:?}. Push constants must provide the stages for all ranges they intersect")] + MissingStages { + actual: wgt::ShaderStage, + idx: usize, + missing: wgt::ShaderStage, + }, + #[error("provided push constant is for stage(s) {actual:?}, however the pipeline layout has no push constant range for the stage(s) {unmatched:?}")] + UnmatchedStages { + actual: wgt::ShaderStage, + unmatched: wgt::ShaderStage, + }, + #[error("provided push constant offset {0} does not respect `PUSH_CONSTANT_ALIGNMENT`")] + Unaligned(u32), +} + +/// Describes a pipeline layout. +/// +/// A `PipelineLayoutDescriptor` can be used to create a pipeline layout. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "trace", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub struct PipelineLayoutDescriptor<'a> { + /// Debug label of the pipeine layout. This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// Bind groups that this pipeline uses. The first entry will provide all the bindings for + /// "set = 0", second entry will provide all the bindings for "set = 1" etc. + pub bind_group_layouts: Cow<'a, [BindGroupLayoutId]>, + /// Set of push constant ranges this pipeline uses. Each shader stage that uses push constants + /// must define the range in push constant memory that corresponds to its single `layout(push_constant)` + /// uniform block. + /// + /// If this array is non-empty, the [`Features::PUSH_CONSTANTS`] must be enabled. + pub push_constant_ranges: Cow<'a, [wgt::PushConstantRange]>, +} + +#[derive(Debug)] +pub struct PipelineLayout { + pub(crate) raw: B::PipelineLayout, + pub(crate) device_id: Stored, + pub(crate) life_guard: LifeGuard, + pub(crate) bind_group_layout_ids: ArrayVec<[Valid; MAX_BIND_GROUPS]>, + pub(crate) push_constant_ranges: ArrayVec<[wgt::PushConstantRange; SHADER_STAGE_COUNT]>, +} + +impl PipelineLayout { + /// Validate push constants match up with expected ranges. + pub(crate) fn validate_push_constant_ranges( + &self, + stages: wgt::ShaderStage, + offset: u32, + end_offset: u32, + ) -> Result<(), PushConstantUploadError> { + // Don't need to validate size against the push constant size limit here, + // as push constant ranges are already validated to be within bounds, + // and we validate that they are within the ranges. + + if offset % wgt::PUSH_CONSTANT_ALIGNMENT != 0 { + return Err(PushConstantUploadError::Unaligned(offset)); + } + + // Push constant validation looks very complicated on the surface, but + // the problem can be range-reduced pretty well. + // + // Push constants require (summarized from the vulkan spec): + // 1. For each byte in the range and for each shader stage in stageFlags, + // there must be a push constant range in the layout that includes that + // byte and that stage. + // 2. For each byte in the range and for each push constant range that overlaps that byte, + // `stage` must include all stages in that push constant range’s `stage`. + // + // However there are some additional constraints that help us: + // 3. All push constant ranges are the only range that can access that stage. + // i.e. if one range has VERTEX, no other range has VERTEX + // + // Therefore we can simplify the checks in the following ways: + // - Because 3 guarantees that the push constant range has a unique stage, + // when we check for 1, we can simply check that our entire updated range + // is within a push constant range. i.e. our range for a specific stage cannot + // intersect more than one push constant range. + let mut used_stages = wgt::ShaderStage::NONE; + for (idx, range) in self.push_constant_ranges.iter().enumerate() { + // contains not intersects due to 2 + if stages.contains(range.stages) { + if !(range.range.start <= offset && end_offset <= range.range.end) { + return Err(PushConstantUploadError::TooLarge { + offset, + end_offset, + idx, + range: range.clone(), + }); + } + used_stages |= range.stages; + } else if stages.intersects(range.stages) { + // Will be caught by used stages check below, but we can do this because of 1 + // and is more helpful to the user. + return Err(PushConstantUploadError::PartialRangeMatch { + actual: stages, + idx, + matched: range.stages, + }); + } + + // The push constant range intersects range we are uploading + if offset < range.range.end && range.range.start < end_offset { + // But requires stages we don't provide + if !stages.contains(range.stages) { + return Err(PushConstantUploadError::MissingStages { + actual: stages, + idx, + missing: stages, + }); + } + } + } + if used_stages != stages { + return Err(PushConstantUploadError::UnmatchedStages { + actual: stages, + unmatched: stages - used_stages, + }); + } + Ok(()) + } +} + +impl Resource for PipelineLayout { + const TYPE: &'static str = "PipelineLayout"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +#[repr(C)] +#[derive(Clone, Debug, Hash, PartialEq)] +#[cfg_attr(feature = "trace", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub struct BufferBinding { + pub buffer_id: BufferId, + pub offset: wgt::BufferAddress, + pub size: Option, +} + +// Note: Duplicated in `wgpu-rs` as `BindingResource` +// They're different enough that it doesn't make sense to share a common type +#[derive(Debug, Clone)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub enum BindingResource<'a> { + Buffer(BufferBinding), + Sampler(SamplerId), + TextureView(TextureViewId), + TextureViewArray(Cow<'a, [TextureViewId]>), +} + +#[derive(Clone, Debug, Error)] +pub enum BindError { + #[error("number of dynamic offsets ({actual}) doesn't match the number of dynamic bindings in the bind group layout ({expected})")] + MismatchedDynamicOffsetCount { actual: usize, expected: usize }, + #[error( + "dynamic binding at index {idx}: offset {offset} does not respect `BIND_BUFFER_ALIGNMENT`" + )] + UnalignedDynamicBinding { idx: usize, offset: u32 }, + #[error("dynamic binding at index {idx} with offset {offset} would overrun the buffer (limit: {max})")] + DynamicBindingOutOfBounds { idx: usize, offset: u32, max: u64 }, +} + +#[derive(Debug)] +pub struct BindGroupDynamicBindingData { + /// The maximum value the dynamic offset can have before running off the end of the buffer. + pub(crate) maximum_dynamic_offset: wgt::BufferAddress, +} + +#[derive(Debug)] +pub struct BindGroup { + pub(crate) raw: DescriptorSet, + pub(crate) device_id: Stored, + pub(crate) layout_id: Valid, + pub(crate) life_guard: LifeGuard, + pub(crate) used: TrackerSet, + pub(crate) dynamic_binding_info: Vec, +} + +impl BindGroup { + pub(crate) fn validate_dynamic_bindings( + &self, + offsets: &[wgt::DynamicOffset], + ) -> Result<(), BindError> { + if self.dynamic_binding_info.len() != offsets.len() { + return Err(BindError::MismatchedDynamicOffsetCount { + expected: self.dynamic_binding_info.len(), + actual: offsets.len(), + }); + } + + for (idx, (info, &offset)) in self + .dynamic_binding_info + .iter() + .zip(offsets.iter()) + .enumerate() + { + if offset as wgt::BufferAddress % wgt::BIND_BUFFER_ALIGNMENT != 0 { + return Err(BindError::UnalignedDynamicBinding { idx, offset }); + } + + if offset as wgt::BufferAddress > info.maximum_dynamic_offset { + return Err(BindError::DynamicBindingOutOfBounds { + idx, + offset, + max: info.maximum_dynamic_offset, + }); + } + } + + Ok(()) + } +} + +impl Borrow<()> for BindGroup { + fn borrow(&self) -> &() { + &DUMMY_SELECTOR + } +} + +impl Resource for BindGroup { + const TYPE: &'static str = "BindGroup"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +#[derive(Clone, Debug, Error)] +pub enum GetBindGroupLayoutError { + #[error("pipeline is invalid")] + InvalidPipeline, + #[error("invalid group index {0}")] + InvalidGroupIndex(u32), +} diff --git a/gfx/wgpu/wgpu-core/src/command/allocator.rs b/gfx/wgpu/wgpu-core/src/command/allocator.rs new file mode 100644 index 0000000000..cfaa6258c2 --- /dev/null +++ b/gfx/wgpu/wgpu-core/src/command/allocator.rs @@ -0,0 +1,268 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use super::CommandBuffer; +use crate::{ + device::DeviceError, hub::GfxBackend, id::DeviceId, track::TrackerSet, FastHashMap, + PrivateFeatures, Stored, SubmissionIndex, +}; + +#[cfg(debug_assertions)] +use crate::LabelHelpers; + +use hal::{command::CommandBuffer as _, device::Device as _, pool::CommandPool as _}; +use parking_lot::Mutex; +use thiserror::Error; + +use std::thread; + +const GROW_AMOUNT: usize = 20; + +#[derive(Debug)] +struct CommandPool { + raw: B::CommandPool, + total: usize, + available: Vec, + pending: Vec<(B::CommandBuffer, SubmissionIndex)>, +} + +impl CommandPool { + fn maintain(&mut self, last_done_index: SubmissionIndex) { + for i in (0..self.pending.len()).rev() { + if self.pending[i].1 <= last_done_index { + let (cmd_buf, index) = self.pending.swap_remove(i); + tracing::trace!( + "recycling cmdbuf submitted in {} when {} is last done", + index, + last_done_index, + ); + self.recycle(cmd_buf); + } + } + } + + fn recycle(&mut self, mut raw: B::CommandBuffer) { + unsafe { + raw.reset(false); + } + self.available.push(raw); + } + + fn allocate(&mut self) -> B::CommandBuffer { + if self.available.is_empty() { + self.total += GROW_AMOUNT; + unsafe { + self.raw.allocate( + GROW_AMOUNT, + hal::command::Level::Primary, + &mut self.available, + ) + }; + } + self.available.pop().unwrap() + } +} + +#[derive(Debug)] +struct Inner { + pools: FastHashMap>, +} + +#[derive(Debug)] +pub struct CommandAllocator { + queue_family: hal::queue::QueueFamilyId, + internal_thread_id: thread::ThreadId, + inner: Mutex>, +} + +impl CommandAllocator { + pub(crate) fn allocate( + &self, + device_id: Stored, + device: &B::Device, + limits: wgt::Limits, + private_features: PrivateFeatures, + label: &crate::Label, + #[cfg(feature = "trace")] enable_tracing: bool, + ) -> Result, CommandAllocatorError> { + //debug_assert_eq!(device_id.backend(), B::VARIANT); + let _ = label; // silence warning on release + let thread_id = thread::current().id(); + let mut inner = self.inner.lock(); + + use std::collections::hash_map::Entry; + let pool = match inner.pools.entry(thread_id) { + Entry::Occupied(e) => e.into_mut(), + Entry::Vacant(e) => { + tracing::info!("Starting on thread {:?}", thread_id); + let raw = unsafe { + device + .create_command_pool( + self.queue_family, + hal::pool::CommandPoolCreateFlags::RESET_INDIVIDUAL, + ) + .or(Err(DeviceError::OutOfMemory))? + }; + let pool = CommandPool { + raw, + total: 0, + available: Vec::new(), + pending: Vec::new(), + }; + e.insert(pool) + } + }; + + let init = pool.allocate(); + + Ok(CommandBuffer { + raw: vec![init], + is_recording: true, + recorded_thread_id: thread_id, + device_id, + trackers: TrackerSet::new(B::VARIANT), + used_swap_chain: None, + limits, + private_features, + #[cfg(feature = "trace")] + commands: if enable_tracing { + Some(Vec::new()) + } else { + None + }, + #[cfg(debug_assertions)] + label: label.to_string_or_default(), + }) + } +} + +impl CommandAllocator { + pub fn new( + queue_family: hal::queue::QueueFamilyId, + device: &B::Device, + ) -> Result { + let internal_thread_id = thread::current().id(); + tracing::info!("Starting on (internal) thread {:?}", internal_thread_id); + let mut pools = FastHashMap::default(); + pools.insert( + internal_thread_id, + CommandPool { + raw: unsafe { + device + .create_command_pool( + queue_family, + hal::pool::CommandPoolCreateFlags::RESET_INDIVIDUAL, + ) + .or(Err(DeviceError::OutOfMemory))? + }, + total: 0, + available: Vec::new(), + pending: Vec::new(), + }, + ); + Ok(Self { + queue_family, + internal_thread_id, + inner: Mutex::new(Inner { pools }), + }) + } + + fn allocate_for_thread_id(&self, thread_id: thread::ThreadId) -> B::CommandBuffer { + let mut inner = self.inner.lock(); + inner.pools.get_mut(&thread_id).unwrap().allocate() + } + + pub fn allocate_internal(&self) -> B::CommandBuffer { + self.allocate_for_thread_id(self.internal_thread_id) + } + + pub fn extend(&self, cmd_buf: &CommandBuffer) -> B::CommandBuffer { + self.allocate_for_thread_id(cmd_buf.recorded_thread_id) + } + + pub fn discard_internal(&self, raw: B::CommandBuffer) { + let mut inner = self.inner.lock(); + inner + .pools + .get_mut(&self.internal_thread_id) + .unwrap() + .recycle(raw); + } + + pub fn discard(&self, mut cmd_buf: CommandBuffer) { + cmd_buf.trackers.clear(); + let mut inner = self.inner.lock(); + let pool = inner.pools.get_mut(&cmd_buf.recorded_thread_id).unwrap(); + for raw in cmd_buf.raw { + pool.recycle(raw); + } + } + + pub fn after_submit_internal(&self, raw: B::CommandBuffer, submit_index: SubmissionIndex) { + let mut inner = self.inner.lock(); + inner + .pools + .get_mut(&self.internal_thread_id) + .unwrap() + .pending + .push((raw, submit_index)); + } + + pub fn after_submit(&self, cmd_buf: CommandBuffer, submit_index: SubmissionIndex) { + // Record this command buffer as pending + let mut inner = self.inner.lock(); + inner + .pools + .get_mut(&cmd_buf.recorded_thread_id) + .unwrap() + .pending + .extend(cmd_buf.raw.into_iter().map(|raw| (raw, submit_index))); + } + + pub fn maintain(&self, device: &B::Device, last_done_index: SubmissionIndex) { + let mut inner = self.inner.lock(); + let mut remove_threads = Vec::new(); + for (&thread_id, pool) in inner.pools.iter_mut() { + pool.maintain(last_done_index); + if pool.total == pool.available.len() && thread_id != self.internal_thread_id { + assert!(pool.pending.is_empty()); + remove_threads.push(thread_id); + } + } + for thread_id in remove_threads { + tracing::info!("Removing from thread {:?}", thread_id); + let mut pool = inner.pools.remove(&thread_id).unwrap(); + unsafe { + pool.raw.free(pool.available); + device.destroy_command_pool(pool.raw); + } + } + } + + pub fn destroy(self, device: &B::Device) { + let mut inner = self.inner.lock(); + for (_, mut pool) in inner.pools.drain() { + while let Some((raw, _)) = pool.pending.pop() { + pool.recycle(raw); + } + if pool.total != pool.available.len() { + tracing::error!( + "Some command buffers are still recorded, only tracking {} / {}", + pool.available.len(), + pool.total + ); + } + unsafe { + pool.raw.free(pool.available); + device.destroy_command_pool(pool.raw); + } + } + } +} + +#[derive(Clone, Debug, Error)] +pub enum CommandAllocatorError { + #[error(transparent)] + Device(#[from] DeviceError), +} diff --git a/gfx/wgpu/wgpu-core/src/command/bind.rs b/gfx/wgpu/wgpu-core/src/command/bind.rs new file mode 100644 index 0000000000..a62b38d5b7 --- /dev/null +++ b/gfx/wgpu/wgpu-core/src/command/bind.rs @@ -0,0 +1,295 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use crate::{ + binding_model::{BindGroup, PipelineLayout}, + device::SHADER_STAGE_COUNT, + hub::{GfxBackend, Storage}, + id::{BindGroupId, BindGroupLayoutId, PipelineLayoutId, Valid}, + Stored, MAX_BIND_GROUPS, +}; + +use arrayvec::ArrayVec; +use std::slice; +use wgt::DynamicOffset; + +type BindGroupMask = u8; + +#[derive(Clone, Debug)] +pub(super) struct BindGroupPair { + layout_id: Valid, + group_id: Stored, +} + +#[derive(Debug)] +pub(super) enum LayoutChange<'a> { + Unchanged, + Match(Valid, &'a [DynamicOffset]), + Mismatch, +} + +#[derive(Debug)] +pub enum Provision { + Unchanged, + Changed { was_compatible: bool }, +} + +#[derive(Clone)] +pub(super) struct FollowUpIter<'a> { + iter: slice::Iter<'a, BindGroupEntry>, +} +impl<'a> Iterator for FollowUpIter<'a> { + type Item = (Valid, &'a [DynamicOffset]); + fn next(&mut self) -> Option { + self.iter + .next() + .and_then(|entry| Some((entry.actual_value()?, entry.dynamic_offsets.as_slice()))) + } +} + +#[derive(Clone, Default, Debug)] +pub(super) struct BindGroupEntry { + expected_layout_id: Option>, + provided: Option, + dynamic_offsets: Vec, +} + +impl BindGroupEntry { + fn provide( + &mut self, + bind_group_id: Valid, + bind_group: &BindGroup, + offsets: &[DynamicOffset], + ) -> Provision { + debug_assert_eq!(B::VARIANT, bind_group_id.0.backend()); + + let was_compatible = match self.provided { + Some(BindGroupPair { + layout_id, + ref group_id, + }) => { + if group_id.value == bind_group_id && offsets == self.dynamic_offsets.as_slice() { + assert_eq!(layout_id, bind_group.layout_id); + return Provision::Unchanged; + } + self.expected_layout_id == Some(layout_id) + } + None => false, + }; + + self.provided = Some(BindGroupPair { + layout_id: bind_group.layout_id, + group_id: Stored { + value: bind_group_id, + ref_count: bind_group.life_guard.add_ref(), + }, + }); + self.dynamic_offsets.clear(); + self.dynamic_offsets.extend_from_slice(offsets); + + Provision::Changed { was_compatible } + } + + pub fn expect_layout( + &mut self, + bind_group_layout_id: Valid, + ) -> LayoutChange { + let some = Some(bind_group_layout_id); + if self.expected_layout_id != some { + self.expected_layout_id = some; + match self.provided { + Some(BindGroupPair { + layout_id, + ref group_id, + }) if layout_id == bind_group_layout_id => { + LayoutChange::Match(group_id.value, &self.dynamic_offsets) + } + Some(_) | None => LayoutChange::Mismatch, + } + } else { + LayoutChange::Unchanged + } + } + + fn is_valid(&self) -> Option { + match (self.expected_layout_id, self.provided.as_ref()) { + (None, None) => Some(true), + (None, Some(_)) => None, + (Some(_), None) => Some(false), + (Some(layout), Some(pair)) => Some(layout == pair.layout_id), + } + } + + fn actual_value(&self) -> Option> { + self.expected_layout_id.and_then(|layout_id| { + self.provided.as_ref().and_then(|pair| { + if pair.layout_id == layout_id { + Some(pair.group_id.value) + } else { + None + } + }) + }) + } +} + +#[derive(Debug)] +pub struct Binder { + pub(super) pipeline_layout_id: Option>, //TODO: strongly `Stored` + pub(super) entries: ArrayVec<[BindGroupEntry; MAX_BIND_GROUPS]>, +} + +impl Binder { + pub(super) fn new(max_bind_groups: u32) -> Self { + Self { + pipeline_layout_id: None, + entries: (0..max_bind_groups) + .map(|_| BindGroupEntry::default()) + .collect(), + } + } + + pub(super) fn reset(&mut self) { + self.pipeline_layout_id = None; + self.entries.clear(); + } + + pub(super) fn change_pipeline_layout( + &mut self, + guard: &Storage, PipelineLayoutId>, + new_id: Valid, + ) { + let old_id_opt = self.pipeline_layout_id.replace(new_id); + let new = &guard[new_id]; + + let length = if let Some(old_id) = old_id_opt { + let old = &guard[old_id]; + if old.push_constant_ranges == new.push_constant_ranges { + new.bind_group_layout_ids.len() + } else { + 0 + } + } else { + 0 + }; + + for entry in self.entries[length..].iter_mut() { + entry.expected_layout_id = None; + } + } + + /// Attempt to set the value of the specified bind group index. + /// Returns Some() when the new bind group is ready to be actually bound + /// (i.e. compatible with current expectations). Also returns an iterator + /// of bind group IDs to be bound with it: those are compatible bind groups + /// that were previously blocked because the current one was incompatible. + pub(super) fn provide_entry<'a, B: GfxBackend>( + &'a mut self, + index: usize, + bind_group_id: Valid, + bind_group: &BindGroup, + offsets: &[DynamicOffset], + ) -> Option<(Valid, FollowUpIter<'a>)> { + tracing::trace!("\tBinding [{}] = group {:?}", index, bind_group_id); + debug_assert_eq!(B::VARIANT, bind_group_id.0.backend()); + + match self.entries[index].provide(bind_group_id, bind_group, offsets) { + Provision::Unchanged => None, + Provision::Changed { was_compatible, .. } => { + let compatible_count = self.compatible_count(); + if index < compatible_count { + let end = compatible_count.min(if was_compatible { + index + 1 + } else { + self.entries.len() + }); + tracing::trace!("\t\tbinding up to {}", end); + Some(( + self.pipeline_layout_id?, + FollowUpIter { + iter: self.entries[index + 1..end].iter(), + }, + )) + } else { + tracing::trace!("\t\tskipping above compatible {}", compatible_count); + None + } + } + } + } + + pub(super) fn list_active(&self) -> impl Iterator> + '_ { + self.entries.iter().filter_map(|e| match e.provided { + Some(ref pair) if e.expected_layout_id.is_some() => Some(pair.group_id.value), + _ => None, + }) + } + + pub(super) fn invalid_mask(&self) -> BindGroupMask { + self.entries.iter().enumerate().fold(0, |mask, (i, entry)| { + if entry.is_valid().unwrap_or(true) { + mask + } else { + mask | 1u8 << i + } + }) + } + + fn compatible_count(&self) -> usize { + self.entries + .iter() + .position(|entry| !entry.is_valid().unwrap_or(false)) + .unwrap_or_else(|| self.entries.len()) + } +} + +struct PushConstantChange { + stages: wgt::ShaderStage, + offset: u32, + enable: bool, +} + +/// Break up possibly overlapping push constant ranges into a set of non-overlapping ranges +/// which contain all the stage flags of the original ranges. This allows us to zero out (or write any value) +/// to every possible value. +pub fn compute_nonoverlapping_ranges( + ranges: &[wgt::PushConstantRange], +) -> ArrayVec<[wgt::PushConstantRange; SHADER_STAGE_COUNT * 2]> { + if ranges.is_empty() { + return ArrayVec::new(); + } + debug_assert!(ranges.len() <= SHADER_STAGE_COUNT); + + let mut breaks: ArrayVec<[PushConstantChange; SHADER_STAGE_COUNT * 2]> = ArrayVec::new(); + for range in ranges { + breaks.push(PushConstantChange { + stages: range.stages, + offset: range.range.start, + enable: true, + }); + breaks.push(PushConstantChange { + stages: range.stages, + offset: range.range.end, + enable: false, + }); + } + breaks.sort_unstable_by_key(|change| change.offset); + + let mut output_ranges = ArrayVec::new(); + let mut position = 0_u32; + let mut stages = wgt::ShaderStage::NONE; + + for bk in breaks { + if bk.offset - position > 0 && !stages.is_empty() { + output_ranges.push(wgt::PushConstantRange { + stages, + range: position..bk.offset, + }) + } + position = bk.offset; + stages.set(bk.stages, bk.enable); + } + + output_ranges +} diff --git a/gfx/wgpu/wgpu-core/src/command/bundle.rs b/gfx/wgpu/wgpu-core/src/command/bundle.rs new file mode 100644 index 0000000000..19c11c7136 --- /dev/null +++ b/gfx/wgpu/wgpu-core/src/command/bundle.rs @@ -0,0 +1,1230 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/*! Render Bundles + + ## Software implementation + + The path from nothing to using a render bundle consists of 3 phases. + + ### Initial command encoding + + User creates a `RenderBundleEncoder` and populates it by issuing commands + from `bundle_ffi` module, just like with `RenderPass`, except that the + set of available commands is reduced. Everything is written into a `RawPass`. + + ### Bundle baking + + Once the commands are encoded, user calls `render_bundle_encoder_finish`. + This is perhaps the most complex part of the logic. It consumes the + commands stored in `RawPass`, while validating everything, tracking the state, + and re-recording the commands into a separate `Vec`. It + doesn't actually execute any commands. + + What's more important, is that the produced vector of commands is "normalized", + which means it can be executed verbatim without any state tracking. More + formally, "normalized" command stream guarantees that any state required by + a draw call is set explicitly by one of the commands between the draw call + and the last changing of the pipeline. + + ### Execution + + When the bundle is used in an actual render pass, `RenderBundle::execute` is + called. It goes through the commands and issues them into the native command + buffer. Thanks to the "normalized" property, it doesn't track any bind group + invalidations or index format changes. +!*/ +#![allow(clippy::reversed_empty_ranges)] + +use crate::{ + command::{ + BasePass, DrawError, MapPassErr, PassErrorScope, RenderCommand, RenderCommandError, + StateChange, + }, + conv, + device::{ + AttachmentData, Device, DeviceError, RenderPassContext, MAX_VERTEX_BUFFERS, + SHADER_STAGE_COUNT, + }, + hub::{GfxBackend, GlobalIdentityHandlerFactory, Hub, Resource, Storage, Token}, + id, + resource::BufferUse, + span, + track::{TrackerSet, UsageConflict}, + validation::check_buffer_usage, + Label, LabelHelpers, LifeGuard, Stored, MAX_BIND_GROUPS, +}; +use arrayvec::ArrayVec; +use std::{borrow::Cow, iter, ops::Range}; +use thiserror::Error; + +/// Describes a [`RenderBundleEncoder`]. +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct RenderBundleEncoderDescriptor<'a> { + /// Debug label of the render bundle encoder. This will show up in graphics debuggers for easy identification. + pub label: Label<'a>, + /// The formats of the color attachments that this render bundle is capable to rendering to. This + /// must match the formats of the color attachments in the renderpass this render bundle is executed in. + pub color_formats: Cow<'a, [wgt::TextureFormat]>, + /// The formats of the depth attachment that this render bundle is capable to rendering to. This + /// must match the formats of the depth attachments in the renderpass this render bundle is executed in. + pub depth_stencil_format: Option, + /// Sample count this render bundle is capable of rendering to. This must match the pipelines and + /// the renderpasses it is used in. + pub sample_count: u32, +} + +#[derive(Debug)] +#[cfg_attr(feature = "serial-pass", derive(serde::Deserialize, serde::Serialize))] +pub struct RenderBundleEncoder { + base: BasePass, + parent_id: id::DeviceId, + pub(crate) context: RenderPassContext, +} + +impl RenderBundleEncoder { + pub fn new( + desc: &RenderBundleEncoderDescriptor, + parent_id: id::DeviceId, + base: Option>, + ) -> Result { + span!(_guard, INFO, "RenderBundleEncoder::new"); + Ok(Self { + base: base.unwrap_or_else(BasePass::new), + parent_id, + context: RenderPassContext { + attachments: AttachmentData { + colors: desc.color_formats.iter().cloned().collect(), + resolves: ArrayVec::new(), + depth_stencil: desc.depth_stencil_format, + }, + sample_count: { + let sc = desc.sample_count; + if sc == 0 || sc > 32 || !conv::is_power_of_two(sc) { + return Err(CreateRenderBundleError::InvalidSampleCount(sc)); + } + sc as u8 + }, + }, + }) + } + + pub fn dummy(parent_id: id::DeviceId) -> Self { + Self { + base: BasePass::new(), + parent_id, + context: RenderPassContext { + attachments: AttachmentData { + colors: ArrayVec::new(), + resolves: ArrayVec::new(), + depth_stencil: None, + }, + sample_count: 0, + }, + } + } + + pub fn parent(&self) -> id::DeviceId { + self.parent_id + } + + pub(crate) fn finish( + self, + desc: &RenderBundleDescriptor, + device: &Device, + hub: &Hub, + token: &mut Token>, + ) -> Result { + let (pipeline_layout_guard, mut token) = hub.pipeline_layouts.read(token); + let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); + let (pipeline_guard, mut token) = hub.render_pipelines.read(&mut token); + let (buffer_guard, _) = hub.buffers.read(&mut token); + + let mut state = State { + trackers: TrackerSet::new(self.parent_id.backend()), + index: IndexState::new(), + vertex: (0..MAX_VERTEX_BUFFERS) + .map(|_| VertexState::new()) + .collect(), + bind: (0..MAX_BIND_GROUPS).map(|_| BindState::new()).collect(), + push_constant_ranges: PushConstantState::new(), + raw_dynamic_offsets: Vec::new(), + flat_dynamic_offsets: Vec::new(), + used_bind_groups: 0, + pipeline: StateChange::new(), + }; + let mut commands = Vec::new(); + let mut base = self.base.as_ref(); + let mut pipeline_layout_id = None::>; + + for &command in base.commands { + match command { + RenderCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => { + let scope = PassErrorScope::SetBindGroup(bind_group_id); + + let max_bind_groups = device.limits.max_bind_groups; + if (index as u32) >= max_bind_groups { + return Err(RenderCommandError::BindGroupIndexOutOfRange { + index, + max: max_bind_groups, + }) + .map_pass_err(scope); + } + + let offsets = &base.dynamic_offsets[..num_dynamic_offsets as usize]; + base.dynamic_offsets = &base.dynamic_offsets[num_dynamic_offsets as usize..]; + // Check for misaligned offsets. + if let Some(offset) = offsets + .iter() + .map(|offset| *offset as wgt::BufferAddress) + .find(|offset| offset % wgt::BIND_BUFFER_ALIGNMENT != 0) + { + return Err(RenderCommandError::UnalignedBufferOffset(offset)) + .map_pass_err(scope); + } + + let bind_group = state + .trackers + .bind_groups + .use_extend(&*bind_group_guard, bind_group_id, (), ()) + .map_err(|_| RenderCommandError::InvalidBindGroup(bind_group_id)) + .map_pass_err(scope)?; + if bind_group.dynamic_binding_info.len() != offsets.len() { + return Err(RenderCommandError::InvalidDynamicOffsetCount { + actual: offsets.len(), + expected: bind_group.dynamic_binding_info.len(), + }) + .map_pass_err(scope); + } + + state.set_bind_group(index, bind_group_id, bind_group.layout_id, offsets); + state + .trackers + .merge_extend(&bind_group.used) + .map_pass_err(scope)?; + } + RenderCommand::SetPipeline(pipeline_id) => { + let scope = PassErrorScope::SetPipelineRender(pipeline_id); + if state.pipeline.set_and_check_redundant(pipeline_id) { + continue; + } + + let pipeline = state + .trackers + .render_pipes + .use_extend(&*pipeline_guard, pipeline_id, (), ()) + .unwrap(); + + self.context + .check_compatible(&pipeline.pass_context) + .map_err(RenderCommandError::IncompatiblePipeline) + .map_pass_err(scope)?; + + //TODO: check read-only depth + + let layout = &pipeline_layout_guard[pipeline.layout_id.value]; + pipeline_layout_id = Some(pipeline.layout_id.value); + + state.set_pipeline( + pipeline.index_format, + &pipeline.vertex_strides, + &layout.bind_group_layout_ids, + &layout.push_constant_ranges, + ); + commands.push(command); + if let Some(iter) = state.flush_push_constants() { + commands.extend(iter) + } + } + RenderCommand::SetIndexBuffer { + buffer_id, + offset, + size, + } => { + let scope = PassErrorScope::SetIndexBuffer(buffer_id); + let buffer = state + .trackers + .buffers + .use_extend(&*buffer_guard, buffer_id, (), BufferUse::INDEX) + .unwrap(); + check_buffer_usage(buffer.usage, wgt::BufferUsage::INDEX) + .map_pass_err(scope)?; + + let end = match size { + Some(s) => offset + s.get(), + None => buffer.size, + }; + state.index.set_buffer(buffer_id, offset..end); + } + RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + } => { + let scope = PassErrorScope::SetVertexBuffer(buffer_id); + let buffer = state + .trackers + .buffers + .use_extend(&*buffer_guard, buffer_id, (), BufferUse::VERTEX) + .unwrap(); + check_buffer_usage(buffer.usage, wgt::BufferUsage::VERTEX) + .map_pass_err(scope)?; + + let end = match size { + Some(s) => offset + s.get(), + None => buffer.size, + }; + state.vertex[slot as usize].set_buffer(buffer_id, offset..end); + } + RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset: _, + } => { + let scope = PassErrorScope::SetPushConstant; + let end_offset = offset + size_bytes; + + let pipeline_layout_id = pipeline_layout_id + .ok_or(DrawError::MissingPipeline) + .map_pass_err(scope)?; + let pipeline_layout = &pipeline_layout_guard[pipeline_layout_id]; + + pipeline_layout + .validate_push_constant_ranges(stages, offset, end_offset) + .map_pass_err(scope)?; + + commands.push(command); + } + RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + } => { + let scope = PassErrorScope::Draw; + let (vertex_limit, instance_limit) = state.vertex_limits(); + let last_vertex = first_vertex + vertex_count; + if last_vertex > vertex_limit { + return Err(DrawError::VertexBeyondLimit { + last_vertex, + vertex_limit, + }) + .map_pass_err(scope); + } + let last_instance = first_instance + instance_count; + if last_instance > instance_limit { + return Err(DrawError::InstanceBeyondLimit { + last_instance, + instance_limit, + }) + .map_pass_err(scope); + } + commands.extend(state.flush_vertices()); + commands.extend(state.flush_binds()); + commands.push(command); + } + RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex: _, + first_instance, + } => { + let scope = PassErrorScope::DrawIndexed; + //TODO: validate that base_vertex + max_index() is within the provided range + let (_, instance_limit) = state.vertex_limits(); + let index_limit = state.index.limit(); + let last_index = first_index + index_count; + if last_index > index_limit { + return Err(DrawError::IndexBeyondLimit { + last_index, + index_limit, + }) + .map_pass_err(scope); + } + let last_instance = first_instance + instance_count; + if last_instance > instance_limit { + return Err(DrawError::InstanceBeyondLimit { + last_instance, + instance_limit, + }) + .map_pass_err(scope); + } + commands.extend(state.index.flush()); + commands.extend(state.flush_vertices()); + commands.extend(state.flush_binds()); + commands.push(command); + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset: _, + count: None, + indexed: false, + } => { + let scope = PassErrorScope::DrawIndirect; + let buffer = state + .trackers + .buffers + .use_extend(&*buffer_guard, buffer_id, (), BufferUse::INDIRECT) + .unwrap(); + check_buffer_usage(buffer.usage, wgt::BufferUsage::INDIRECT) + .map_pass_err(scope)?; + + commands.extend(state.flush_vertices()); + commands.extend(state.flush_binds()); + commands.push(command); + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset: _, + count: None, + indexed: true, + } => { + let scope = PassErrorScope::DrawIndexedIndirect; + let buffer = state + .trackers + .buffers + .use_extend(&*buffer_guard, buffer_id, (), BufferUse::INDIRECT) + .map_err(|err| RenderCommandError::Buffer(buffer_id, err)) + .map_pass_err(scope)?; + check_buffer_usage(buffer.usage, wgt::BufferUsage::INDIRECT) + .map_pass_err(scope)?; + + commands.extend(state.index.flush()); + commands.extend(state.flush_vertices()); + commands.extend(state.flush_binds()); + commands.push(command); + } + RenderCommand::MultiDrawIndirect { .. } + | RenderCommand::MultiDrawIndirectCount { .. } => unimplemented!(), + RenderCommand::PushDebugGroup { color: _, len: _ } => unimplemented!(), + RenderCommand::InsertDebugMarker { color: _, len: _ } => unimplemented!(), + RenderCommand::PopDebugGroup => unimplemented!(), + RenderCommand::ExecuteBundle(_) + | RenderCommand::SetBlendColor(_) + | RenderCommand::SetStencilReference(_) + | RenderCommand::SetViewport { .. } + | RenderCommand::SetScissor(_) => unreachable!("not supported by a render bundle"), + } + } + + let _ = desc.label; //TODO: actually use + Ok(RenderBundle { + base: BasePass { + commands, + dynamic_offsets: state.flat_dynamic_offsets, + string_data: Vec::new(), + push_constant_data: Vec::new(), + }, + device_id: Stored { + value: id::Valid(self.parent_id), + ref_count: device.life_guard.add_ref(), + }, + used: state.trackers, + context: self.context, + life_guard: LifeGuard::new(desc.label.borrow_or_default()), + }) + } +} + +/// Error type returned from `RenderBundleEncoder::new` if the sample count is invalid. +#[derive(Clone, Debug, Error)] +pub enum CreateRenderBundleError { + #[error("invalid number of samples {0}")] + InvalidSampleCount(u32), +} + +/// Error type returned from `RenderBundleEncoder::new` if the sample count is invalid. +#[derive(Clone, Debug, Error)] +pub enum ExecutionError { + #[error("buffer {0:?} is destroyed")] + DestroyedBuffer(id::BufferId), +} + +pub type RenderBundleDescriptor<'a> = wgt::RenderBundleDescriptor>; + +//Note: here, `RenderBundle` is just wrapping a raw stream of render commands. +// The plan is to back it by an actual Vulkan secondary buffer, D3D12 Bundle, +// or Metal indirect command buffer. +#[derive(Debug)] +pub struct RenderBundle { + // Normalized command stream. It can be executed verbatim, + // without re-binding anything on the pipeline change. + base: BasePass, + pub(crate) device_id: Stored, + pub(crate) used: TrackerSet, + pub(crate) context: RenderPassContext, + pub(crate) life_guard: LifeGuard, +} + +unsafe impl Send for RenderBundle {} +unsafe impl Sync for RenderBundle {} + +impl RenderBundle { + #[cfg(feature = "trace")] + pub(crate) fn to_base_pass(&self) -> BasePass { + BasePass::from_ref(self.base.as_ref()) + } + + /// Actually encode the contents into a native command buffer. + /// + /// This is partially duplicating the logic of `command_encoder_run_render_pass`. + /// However the point of this function is to be lighter, since we already had + /// a chance to go through the commands in `render_bundle_encoder_finish`. + /// + /// Note that the function isn't expected to fail, generally. + /// All the validation has already been done by this point. + /// The only failure condition is if some of the used buffers are destroyed. + pub(crate) unsafe fn execute( + &self, + cmd_buf: &mut B::CommandBuffer, + pipeline_layout_guard: &Storage< + crate::binding_model::PipelineLayout, + id::PipelineLayoutId, + >, + bind_group_guard: &Storage, id::BindGroupId>, + pipeline_guard: &Storage, id::RenderPipelineId>, + buffer_guard: &Storage, id::BufferId>, + ) -> Result<(), ExecutionError> { + use hal::command::CommandBuffer as _; + + let mut offsets = self.base.dynamic_offsets.as_slice(); + let mut index_type = hal::IndexType::U16; + let mut pipeline_layout_id = None::>; + + for command in self.base.commands.iter() { + match *command { + RenderCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => { + let bind_group = bind_group_guard.get(bind_group_id).unwrap(); + cmd_buf.bind_graphics_descriptor_sets( + &pipeline_layout_guard[pipeline_layout_id.unwrap()].raw, + index as usize, + iter::once(bind_group.raw.raw()), + &offsets[..num_dynamic_offsets as usize], + ); + offsets = &offsets[num_dynamic_offsets as usize..]; + } + RenderCommand::SetPipeline(pipeline_id) => { + let pipeline = pipeline_guard.get(pipeline_id).unwrap(); + cmd_buf.bind_graphics_pipeline(&pipeline.raw); + index_type = conv::map_index_format(pipeline.index_format); + pipeline_layout_id = Some(pipeline.layout_id.value); + } + RenderCommand::SetIndexBuffer { + buffer_id, + offset, + size, + } => { + let &(ref buffer, _) = buffer_guard + .get(buffer_id) + .unwrap() + .raw + .as_ref() + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))?; + let range = hal::buffer::SubRange { + offset, + size: size.map(|s| s.get()), + }; + cmd_buf.bind_index_buffer(buffer, range, index_type); + } + RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + } => { + let &(ref buffer, _) = buffer_guard + .get(buffer_id) + .unwrap() + .raw + .as_ref() + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))?; + let range = hal::buffer::SubRange { + offset, + size: size.map(|s| s.get()), + }; + cmd_buf.bind_vertex_buffers(slot, iter::once((buffer, range))); + } + RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset, + } => { + let pipeline_layout_id = pipeline_layout_id.unwrap(); + let pipeline_layout = &pipeline_layout_guard[pipeline_layout_id]; + + if let Some(values_offset) = values_offset { + let values_end_offset = + (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize; + let data_slice = &self.base.push_constant_data + [(values_offset as usize)..values_end_offset]; + + cmd_buf.push_graphics_constants( + &pipeline_layout.raw, + conv::map_shader_stage_flags(stages), + offset, + &data_slice, + ) + } else { + super::push_constant_clear( + offset, + size_bytes, + |clear_offset, clear_data| { + cmd_buf.push_graphics_constants( + &pipeline_layout.raw, + conv::map_shader_stage_flags(stages), + clear_offset, + clear_data, + ); + }, + ); + } + } + RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + } => { + cmd_buf.draw( + first_vertex..first_vertex + vertex_count, + first_instance..first_instance + instance_count, + ); + } + RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + } => { + cmd_buf.draw_indexed( + first_index..first_index + index_count, + base_vertex, + first_instance..first_instance + instance_count, + ); + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: false, + } => { + let &(ref buffer, _) = buffer_guard + .get(buffer_id) + .unwrap() + .raw + .as_ref() + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))?; + cmd_buf.draw_indirect(buffer, offset, 1, 0); + } + RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: true, + } => { + let &(ref buffer, _) = buffer_guard + .get(buffer_id) + .unwrap() + .raw + .as_ref() + .ok_or(ExecutionError::DestroyedBuffer(buffer_id))?; + cmd_buf.draw_indexed_indirect(buffer, offset, 1, 0); + } + RenderCommand::MultiDrawIndirect { .. } + | RenderCommand::MultiDrawIndirectCount { .. } => unimplemented!(), + RenderCommand::PushDebugGroup { color: _, len: _ } => unimplemented!(), + RenderCommand::InsertDebugMarker { color: _, len: _ } => unimplemented!(), + RenderCommand::PopDebugGroup => unimplemented!(), + RenderCommand::ExecuteBundle(_) + | RenderCommand::SetBlendColor(_) + | RenderCommand::SetStencilReference(_) + | RenderCommand::SetViewport { .. } + | RenderCommand::SetScissor(_) => unreachable!(), + } + } + + Ok(()) + } +} + +impl Resource for RenderBundle { + const TYPE: &'static str = "RenderBundle"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +#[derive(Debug)] +struct IndexState { + buffer: Option, + format: wgt::IndexFormat, + range: Range, + is_dirty: bool, +} + +impl IndexState { + fn new() -> Self { + Self { + buffer: None, + format: wgt::IndexFormat::default(), + range: 0..0, + is_dirty: false, + } + } + + fn limit(&self) -> u32 { + assert!(self.buffer.is_some()); + let bytes_per_index = match self.format { + wgt::IndexFormat::Uint16 => 2, + wgt::IndexFormat::Uint32 => 4, + }; + ((self.range.end - self.range.start) / bytes_per_index) as u32 + } + + fn flush(&mut self) -> Option { + if self.is_dirty { + self.is_dirty = false; + Some(RenderCommand::SetIndexBuffer { + buffer_id: self.buffer.unwrap(), + offset: self.range.start, + size: wgt::BufferSize::new(self.range.end - self.range.start), + }) + } else { + None + } + } + + fn set_format(&mut self, format: wgt::IndexFormat) { + if self.format != format { + self.format = format; + self.is_dirty = true; + } + } + + fn set_buffer(&mut self, id: id::BufferId, range: Range) { + self.buffer = Some(id); + self.range = range; + self.is_dirty = true; + } +} + +#[derive(Debug)] +struct VertexState { + buffer: Option, + range: Range, + stride: wgt::BufferAddress, + rate: wgt::InputStepMode, + is_dirty: bool, +} + +impl VertexState { + fn new() -> Self { + Self { + buffer: None, + range: 0..0, + stride: 0, + rate: wgt::InputStepMode::Vertex, + is_dirty: false, + } + } + + fn set_buffer(&mut self, buffer_id: id::BufferId, range: Range) { + self.buffer = Some(buffer_id); + self.range = range; + self.is_dirty = true; + } + + fn flush(&mut self, slot: u32) -> Option { + if self.is_dirty { + self.is_dirty = false; + Some(RenderCommand::SetVertexBuffer { + slot, + buffer_id: self.buffer.unwrap(), + offset: self.range.start, + size: wgt::BufferSize::new(self.range.end - self.range.start), + }) + } else { + None + } + } +} + +#[derive(Debug)] +struct BindState { + bind_group: Option<(id::BindGroupId, id::BindGroupLayoutId)>, + dynamic_offsets: Range, + is_dirty: bool, +} + +impl BindState { + fn new() -> Self { + Self { + bind_group: None, + dynamic_offsets: 0..0, + is_dirty: false, + } + } + + fn set_group( + &mut self, + bind_group_id: id::BindGroupId, + layout_id: id::BindGroupLayoutId, + dyn_offset: usize, + dyn_count: usize, + ) -> bool { + match self.bind_group { + Some((bg_id, _)) if bg_id == bind_group_id && dyn_count == 0 => false, + _ => { + self.bind_group = Some((bind_group_id, layout_id)); + self.dynamic_offsets = dyn_offset..dyn_offset + dyn_count; + self.is_dirty = true; + true + } + } + } +} + +#[derive(Debug)] +struct PushConstantState { + ranges: ArrayVec<[wgt::PushConstantRange; SHADER_STAGE_COUNT]>, + is_dirty: bool, +} +impl PushConstantState { + fn new() -> Self { + Self { + ranges: ArrayVec::new(), + is_dirty: false, + } + } + + fn set_push_constants(&mut self, new_ranges: &[wgt::PushConstantRange]) -> bool { + if &*self.ranges != new_ranges { + self.ranges = new_ranges.iter().cloned().collect(); + self.is_dirty = true; + true + } else { + false + } + } +} + +#[derive(Debug)] +struct State { + trackers: TrackerSet, + index: IndexState, + vertex: ArrayVec<[VertexState; MAX_VERTEX_BUFFERS]>, + bind: ArrayVec<[BindState; MAX_BIND_GROUPS]>, + push_constant_ranges: PushConstantState, + raw_dynamic_offsets: Vec, + flat_dynamic_offsets: Vec, + used_bind_groups: usize, + pipeline: StateChange, +} + +impl State { + fn vertex_limits(&self) -> (u32, u32) { + let mut vertex_limit = !0; + let mut instance_limit = !0; + for vbs in &self.vertex { + if vbs.stride == 0 { + continue; + } + let limit = ((vbs.range.end - vbs.range.start) / vbs.stride) as u32; + match vbs.rate { + wgt::InputStepMode::Vertex => vertex_limit = vertex_limit.min(limit), + wgt::InputStepMode::Instance => instance_limit = instance_limit.min(limit), + } + } + (vertex_limit, instance_limit) + } + + fn invalidate_group_from(&mut self, slot: usize) { + for bind in self.bind[slot..].iter_mut() { + if bind.bind_group.is_some() { + bind.is_dirty = true; + } + } + } + + fn set_bind_group( + &mut self, + slot: u8, + bind_group_id: id::BindGroupId, + layout_id: id::Valid, + offsets: &[wgt::DynamicOffset], + ) { + if self.bind[slot as usize].set_group( + bind_group_id, + layout_id.0, + self.raw_dynamic_offsets.len(), + offsets.len(), + ) { + self.invalidate_group_from(slot as usize + 1); + } + self.raw_dynamic_offsets.extend(offsets); + } + + fn set_pipeline( + &mut self, + index_format: wgt::IndexFormat, + vertex_strides: &[(wgt::BufferAddress, wgt::InputStepMode)], + layout_ids: &[id::Valid], + push_constant_layouts: &[wgt::PushConstantRange], + ) { + self.index.set_format(index_format); + for (vs, &(stride, step_mode)) in self.vertex.iter_mut().zip(vertex_strides) { + if vs.stride != stride || vs.rate != step_mode { + vs.stride = stride; + vs.rate = step_mode; + vs.is_dirty = true; + } + } + + let push_constants_changed = self + .push_constant_ranges + .set_push_constants(push_constant_layouts); + + self.used_bind_groups = layout_ids.len(); + let invalid_from = if push_constants_changed { + Some(0) + } else { + self.bind + .iter() + .zip(layout_ids) + .position(|(bs, layout_id)| match bs.bind_group { + Some((_, bgl_id)) => bgl_id != layout_id.0, + None => false, + }) + }; + if let Some(slot) = invalid_from { + self.invalidate_group_from(slot); + } + } + + fn flush_push_constants(&mut self) -> Option> { + let is_dirty = self.push_constant_ranges.is_dirty; + + if is_dirty { + let nonoverlapping_ranges = + super::bind::compute_nonoverlapping_ranges(&self.push_constant_ranges.ranges); + + Some( + nonoverlapping_ranges + .into_iter() + .map(|range| RenderCommand::SetPushConstant { + stages: range.stages, + offset: range.range.start, + size_bytes: range.range.end - range.range.start, + values_offset: None, + }), + ) + } else { + None + } + } + + fn flush_vertices(&mut self) -> impl Iterator + '_ { + self.vertex + .iter_mut() + .enumerate() + .flat_map(|(i, vs)| vs.flush(i as u32)) + } + + fn flush_binds(&mut self) -> impl Iterator + '_ { + for bs in self.bind[..self.used_bind_groups].iter() { + if bs.is_dirty { + self.flat_dynamic_offsets + .extend_from_slice(&self.raw_dynamic_offsets[bs.dynamic_offsets.clone()]); + } + } + self.bind + .iter_mut() + .take(self.used_bind_groups) + .enumerate() + .flat_map(|(i, bs)| { + if bs.is_dirty { + bs.is_dirty = false; + Some(RenderCommand::SetBindGroup { + index: i as u8, + bind_group_id: bs.bind_group.unwrap().0, + num_dynamic_offsets: (bs.dynamic_offsets.end - bs.dynamic_offsets.start) + as u8, + }) + } else { + None + } + }) + } +} + +/// Error encountered when finishing recording a render bundle. +#[derive(Clone, Debug, Error)] +pub(super) enum RenderBundleErrorInner { + #[error(transparent)] + Device(#[from] DeviceError), + #[error(transparent)] + RenderCommand(RenderCommandError), + #[error(transparent)] + ResourceUsageConflict(#[from] UsageConflict), + #[error(transparent)] + Draw(#[from] DrawError), +} + +impl From for RenderBundleErrorInner +where + T: Into, +{ + fn from(t: T) -> Self { + Self::RenderCommand(t.into()) + } +} + +/// Error encountered when finishing recording a render bundle. +#[derive(Clone, Debug, Error)] +#[error("{scope}")] +pub struct RenderBundleError { + pub scope: PassErrorScope, + #[source] + inner: RenderBundleErrorInner, +} + +impl RenderBundleError { + pub(crate) const INVALID_DEVICE: Self = RenderBundleError { + scope: PassErrorScope::Bundle, + inner: RenderBundleErrorInner::Device(DeviceError::Invalid), + }; +} + +impl MapPassErr for Result +where + E: Into, +{ + fn map_pass_err(self, scope: PassErrorScope) -> Result { + self.map_err(|inner| RenderBundleError { + scope, + inner: inner.into(), + }) + } +} + +pub mod bundle_ffi { + use super::{RenderBundleEncoder, RenderCommand}; + use crate::{id, span, RawString}; + use std::{convert::TryInto, slice}; + use wgt::{BufferAddress, BufferSize, DynamicOffset}; + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `offset_length` elements. + // TODO: There might be other safety issues, such as using the unsafe + // `RawPass::encode` and `RawPass::encode_slice`. + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_set_bind_group( + bundle: &mut RenderBundleEncoder, + index: u32, + bind_group_id: id::BindGroupId, + offsets: *const DynamicOffset, + offset_length: usize, + ) { + span!(_guard, DEBUG, "RenderBundle::set_bind_group"); + bundle.base.commands.push(RenderCommand::SetBindGroup { + index: index.try_into().unwrap(), + num_dynamic_offsets: offset_length.try_into().unwrap(), + bind_group_id, + }); + bundle + .base + .dynamic_offsets + .extend_from_slice(slice::from_raw_parts(offsets, offset_length)); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_set_pipeline( + bundle: &mut RenderBundleEncoder, + pipeline_id: id::RenderPipelineId, + ) { + span!(_guard, DEBUG, "RenderBundle::set_pipeline"); + bundle + .base + .commands + .push(RenderCommand::SetPipeline(pipeline_id)); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_set_index_buffer( + bundle: &mut RenderBundleEncoder, + buffer_id: id::BufferId, + offset: BufferAddress, + size: Option, + ) { + span!(_guard, DEBUG, "RenderBundle::set_index_buffer"); + bundle.base.commands.push(RenderCommand::SetIndexBuffer { + buffer_id, + offset, + size, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_set_vertex_buffer( + bundle: &mut RenderBundleEncoder, + slot: u32, + buffer_id: id::BufferId, + offset: BufferAddress, + size: Option, + ) { + span!(_guard, DEBUG, "RenderBundle::set_vertex_buffer"); + bundle.base.commands.push(RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + }); + } + + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_set_push_constants( + pass: &mut RenderBundleEncoder, + stages: wgt::ShaderStage, + offset: u32, + size_bytes: u32, + data: *const u8, + ) { + span!(_guard, DEBUG, "RenderBundle::set_push_constants"); + assert_eq!( + offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant offset must be aligned to 4 bytes." + ); + assert_eq!( + size_bytes & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant size must be aligned to 4 bytes." + ); + let data_slice = slice::from_raw_parts(data, size_bytes as usize); + let value_offset = pass.base.push_constant_data.len().try_into().expect( + "Ran out of push constant space. Don't set 4gb of push constants per RenderBundle.", + ); + + pass.base.push_constant_data.extend( + data_slice + .chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) + .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])), + ); + + pass.base.commands.push(RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset: Some(value_offset), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_draw( + bundle: &mut RenderBundleEncoder, + vertex_count: u32, + instance_count: u32, + first_vertex: u32, + first_instance: u32, + ) { + span!(_guard, DEBUG, "RenderBundle::draw"); + bundle.base.commands.push(RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_draw_indexed( + bundle: &mut RenderBundleEncoder, + index_count: u32, + instance_count: u32, + first_index: u32, + base_vertex: i32, + first_instance: u32, + ) { + span!(_guard, DEBUG, "RenderBundle::draw_indexed"); + bundle.base.commands.push(RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_bundle_draw_indirect( + bundle: &mut RenderBundleEncoder, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + span!(_guard, DEBUG, "RenderBundle::draw_indirect"); + bundle.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: false, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_render_pass_bundle_indexed_indirect( + bundle: &mut RenderBundleEncoder, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + span!(_guard, DEBUG, "RenderBundle::draw_indexed_indirect"); + bundle.base.commands.push(RenderCommand::MultiDrawIndirect { + buffer_id, + offset, + count: None, + indexed: true, + }); + } + + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_push_debug_group( + _bundle: &mut RenderBundleEncoder, + _label: RawString, + ) { + span!(_guard, DEBUG, "RenderBundle::push_debug_group"); + //TODO + } + + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_pop_debug_group(_bundle: &mut RenderBundleEncoder) { + span!(_guard, DEBUG, "RenderBundle::pop_debug_group"); + //TODO + } + + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_insert_debug_marker( + _bundle: &mut RenderBundleEncoder, + _label: RawString, + ) { + span!(_guard, DEBUG, "RenderBundle::insert_debug_marker"); + //TODO + } +} diff --git a/gfx/wgpu/wgpu-core/src/command/compute.rs b/gfx/wgpu/wgpu-core/src/command/compute.rs new file mode 100644 index 0000000000..1ab9df2516 --- /dev/null +++ b/gfx/wgpu/wgpu-core/src/command/compute.rs @@ -0,0 +1,657 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use crate::{ + binding_model::{BindError, BindGroup, PushConstantUploadError}, + command::{ + bind::{Binder, LayoutChange}, + BasePass, BasePassRef, CommandBuffer, CommandEncoderError, MapPassErr, PassErrorScope, + StateChange, + }, + hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, + id, + resource::{Buffer, BufferUse, Texture}, + span, + track::{TrackerSet, UsageConflict}, + validation::{check_buffer_usage, MissingBufferUsageError}, + MAX_BIND_GROUPS, +}; + +use arrayvec::ArrayVec; +use hal::command::CommandBuffer as _; +use thiserror::Error; +use wgt::{BufferAddress, BufferUsage, ShaderStage}; + +use std::{fmt, iter, str}; + +#[doc(hidden)] +#[derive(Clone, Copy, Debug)] +#[cfg_attr( + any(feature = "serial-pass", feature = "trace"), + derive(serde::Serialize) +)] +#[cfg_attr( + any(feature = "serial-pass", feature = "replay"), + derive(serde::Deserialize) +)] +pub enum ComputeCommand { + SetBindGroup { + index: u8, + num_dynamic_offsets: u8, + bind_group_id: id::BindGroupId, + }, + SetPipeline(id::ComputePipelineId), + SetPushConstant { + offset: u32, + size_bytes: u32, + values_offset: u32, + }, + Dispatch([u32; 3]), + DispatchIndirect { + buffer_id: id::BufferId, + offset: BufferAddress, + }, + PushDebugGroup { + color: u32, + len: usize, + }, + PopDebugGroup, + InsertDebugMarker { + color: u32, + len: usize, + }, +} + +#[cfg_attr(feature = "serial-pass", derive(serde::Deserialize, serde::Serialize))] +pub struct ComputePass { + base: BasePass, + parent_id: id::CommandEncoderId, +} + +impl ComputePass { + pub fn new(parent_id: id::CommandEncoderId) -> Self { + Self { + base: BasePass::new(), + parent_id, + } + } + + pub fn parent_id(&self) -> id::CommandEncoderId { + self.parent_id + } + + #[cfg(feature = "trace")] + pub fn into_command(self) -> crate::device::trace::Command { + crate::device::trace::Command::RunComputePass { base: self.base } + } +} + +impl fmt::Debug for ComputePass { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "ComputePass {{ encoder_id: {:?}, data: {:?} commands and {:?} dynamic offsets }}", + self.parent_id, + self.base.commands.len(), + self.base.dynamic_offsets.len() + ) + } +} + +#[repr(C)] +#[derive(Clone, Debug, Default)] +pub struct ComputePassDescriptor { + pub todo: u32, +} + +#[derive(Clone, Debug, Error, PartialEq)] +pub enum DispatchError { + #[error("compute pipeline must be set")] + MissingPipeline, + #[error("current compute pipeline has a layout which is incompatible with a currently set bind group, first differing at entry index {index}")] + IncompatibleBindGroup { + index: u32, + //expected: BindGroupLayoutId, + //provided: Option<(BindGroupLayoutId, BindGroupId)>, + }, +} + +/// Error encountered when performing a compute pass. +#[derive(Clone, Debug, Error)] +pub enum ComputePassErrorInner { + #[error(transparent)] + Encoder(#[from] CommandEncoderError), + #[error("bind group {0:?} is invalid")] + InvalidBindGroup(id::BindGroupId), + #[error("bind group index {index} is greater than the device's requested `max_bind_group` limit {max}")] + BindGroupIndexOutOfRange { index: u8, max: u32 }, + #[error("compute pipeline {0:?} is invalid")] + InvalidPipeline(id::ComputePipelineId), + #[error("indirect buffer {0:?} is invalid or destroyed")] + InvalidIndirectBuffer(id::BufferId), + #[error(transparent)] + ResourceUsageConflict(#[from] UsageConflict), + #[error(transparent)] + MissingBufferUsage(#[from] MissingBufferUsageError), + #[error("cannot pop debug group, because number of pushed debug groups is zero")] + InvalidPopDebugGroup, + #[error(transparent)] + Dispatch(#[from] DispatchError), + #[error(transparent)] + Bind(#[from] BindError), + #[error(transparent)] + PushConstants(#[from] PushConstantUploadError), +} + +/// Error encountered when performing a compute pass. +#[derive(Clone, Debug, Error)] +#[error("{scope}")] +pub struct ComputePassError { + pub scope: PassErrorScope, + #[source] + inner: ComputePassErrorInner, +} + +impl MapPassErr for Result +where + E: Into, +{ + fn map_pass_err(self, scope: PassErrorScope) -> Result { + self.map_err(|inner| ComputePassError { + scope, + inner: inner.into(), + }) + } +} + +#[derive(Debug)] +struct State { + binder: Binder, + pipeline: StateChange, + trackers: TrackerSet, + debug_scope_depth: u32, +} + +impl State { + fn is_ready(&self) -> Result<(), DispatchError> { + //TODO: vertex buffers + let bind_mask = self.binder.invalid_mask(); + if bind_mask != 0 { + //let (expected, provided) = self.binder.entries[index as usize].info(); + return Err(DispatchError::IncompatibleBindGroup { + index: bind_mask.trailing_zeros(), + }); + } + if self.pipeline.is_unset() { + return Err(DispatchError::MissingPipeline); + } + Ok(()) + } + + fn flush_states( + &mut self, + raw_cmd_buf: &mut B::CommandBuffer, + base_trackers: &mut TrackerSet, + bind_group_guard: &Storage, id::BindGroupId>, + buffer_guard: &Storage, id::BufferId>, + texture_guard: &Storage, id::TextureId>, + ) -> Result<(), UsageConflict> { + for id in self.binder.list_active() { + self.trackers.merge_extend(&bind_group_guard[id].used)?; + } + + tracing::trace!("Encoding dispatch barriers"); + + CommandBuffer::insert_barriers( + raw_cmd_buf, + base_trackers, + &self.trackers, + buffer_guard, + texture_guard, + ); + + self.trackers.clear(); + Ok(()) + } +} + +// Common routines between render/compute + +impl Global { + pub fn command_encoder_run_compute_pass( + &self, + encoder_id: id::CommandEncoderId, + pass: &ComputePass, + ) -> Result<(), ComputePassError> { + self.command_encoder_run_compute_pass_impl::(encoder_id, pass.base.as_ref()) + } + + #[doc(hidden)] + pub fn command_encoder_run_compute_pass_impl( + &self, + encoder_id: id::CommandEncoderId, + mut base: BasePassRef, + ) -> Result<(), ComputePassError> { + span!(_guard, INFO, "CommandEncoder::run_compute_pass"); + let scope = PassErrorScope::Pass(encoder_id); + + let hub = B::hub(self); + let mut token = Token::root(); + + let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); + let cmd_buf = + CommandBuffer::get_encoder(&mut *cmd_buf_guard, encoder_id).map_pass_err(scope)?; + let raw = cmd_buf.raw.last_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(crate::device::trace::Command::RunComputePass { + base: BasePass::from_ref(base), + }); + } + + let (_, mut token) = hub.render_bundles.read(&mut token); + let (pipeline_layout_guard, mut token) = hub.pipeline_layouts.read(&mut token); + let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); + let (pipeline_guard, mut token) = hub.compute_pipelines.read(&mut token); + let (buffer_guard, mut token) = hub.buffers.read(&mut token); + let (texture_guard, _) = hub.textures.read(&mut token); + + let mut state = State { + binder: Binder::new(cmd_buf.limits.max_bind_groups), + pipeline: StateChange::new(), + trackers: TrackerSet::new(B::VARIANT), + debug_scope_depth: 0, + }; + let mut temp_offsets = Vec::new(); + + for command in base.commands { + match *command { + ComputeCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => { + let scope = PassErrorScope::SetBindGroup(bind_group_id); + + let max_bind_groups = cmd_buf.limits.max_bind_groups; + if (index as u32) >= max_bind_groups { + return Err(ComputePassErrorInner::BindGroupIndexOutOfRange { + index, + max: max_bind_groups, + }) + .map_pass_err(scope); + } + + temp_offsets.clear(); + temp_offsets + .extend_from_slice(&base.dynamic_offsets[..num_dynamic_offsets as usize]); + base.dynamic_offsets = &base.dynamic_offsets[num_dynamic_offsets as usize..]; + + let bind_group = cmd_buf + .trackers + .bind_groups + .use_extend(&*bind_group_guard, bind_group_id, (), ()) + .map_err(|_| ComputePassErrorInner::InvalidBindGroup(bind_group_id)) + .map_pass_err(scope)?; + bind_group + .validate_dynamic_bindings(&temp_offsets) + .map_pass_err(scope)?; + + if let Some((pipeline_layout_id, follow_ups)) = state.binder.provide_entry( + index as usize, + id::Valid(bind_group_id), + bind_group, + &temp_offsets, + ) { + let bind_groups = iter::once(bind_group.raw.raw()) + .chain( + follow_ups + .clone() + .map(|(bg_id, _)| bind_group_guard[bg_id].raw.raw()), + ) + .collect::>(); + temp_offsets.extend(follow_ups.flat_map(|(_, offsets)| offsets)); + unsafe { + raw.bind_compute_descriptor_sets( + &pipeline_layout_guard[pipeline_layout_id].raw, + index as usize, + bind_groups, + &temp_offsets, + ); + } + } + } + ComputeCommand::SetPipeline(pipeline_id) => { + let scope = PassErrorScope::SetPipelineCompute(pipeline_id); + + if state.pipeline.set_and_check_redundant(pipeline_id) { + continue; + } + + let pipeline = cmd_buf + .trackers + .compute_pipes + .use_extend(&*pipeline_guard, pipeline_id, (), ()) + .map_err(|_| ComputePassErrorInner::InvalidPipeline(pipeline_id)) + .map_pass_err(scope)?; + + unsafe { + raw.bind_compute_pipeline(&pipeline.raw); + } + + // Rebind resources + if state.binder.pipeline_layout_id != Some(pipeline.layout_id.value) { + let pipeline_layout = &pipeline_layout_guard[pipeline.layout_id.value]; + + state.binder.change_pipeline_layout( + &*pipeline_layout_guard, + pipeline.layout_id.value, + ); + + let mut is_compatible = true; + + for (index, (entry, &bgl_id)) in state + .binder + .entries + .iter_mut() + .zip(&pipeline_layout.bind_group_layout_ids) + .enumerate() + { + match entry.expect_layout(bgl_id) { + LayoutChange::Match(bg_id, offsets) if is_compatible => { + let desc_set = bind_group_guard[bg_id].raw.raw(); + unsafe { + raw.bind_compute_descriptor_sets( + &pipeline_layout.raw, + index, + iter::once(desc_set), + offsets.iter().cloned(), + ); + } + } + LayoutChange::Match(..) | LayoutChange::Unchanged => {} + LayoutChange::Mismatch => { + is_compatible = false; + } + } + } + + // Clear push constant ranges + let non_overlapping = super::bind::compute_nonoverlapping_ranges( + &pipeline_layout.push_constant_ranges, + ); + for range in non_overlapping { + let offset = range.range.start; + let size_bytes = range.range.end - offset; + super::push_constant_clear( + offset, + size_bytes, + |clear_offset, clear_data| unsafe { + raw.push_compute_constants( + &pipeline_layout.raw, + clear_offset, + clear_data, + ); + }, + ); + } + } + } + ComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset, + } => { + let scope = PassErrorScope::SetPushConstant; + + let end_offset_bytes = offset + size_bytes; + let values_end_offset = + (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize; + let data_slice = + &base.push_constant_data[(values_offset as usize)..values_end_offset]; + + let pipeline_layout_id = state + .binder + .pipeline_layout_id + //TODO: don't error here, lazily update the push constants + .ok_or(ComputePassErrorInner::Dispatch( + DispatchError::MissingPipeline, + )) + .map_pass_err(scope)?; + let pipeline_layout = &pipeline_layout_guard[pipeline_layout_id]; + + pipeline_layout + .validate_push_constant_ranges( + ShaderStage::COMPUTE, + offset, + end_offset_bytes, + ) + .map_pass_err(scope)?; + + unsafe { raw.push_compute_constants(&pipeline_layout.raw, offset, data_slice) } + } + ComputeCommand::Dispatch(groups) => { + let scope = PassErrorScope::Dispatch; + + state.is_ready().map_pass_err(scope)?; + state + .flush_states( + raw, + &mut cmd_buf.trackers, + &*bind_group_guard, + &*buffer_guard, + &*texture_guard, + ) + .map_pass_err(scope)?; + unsafe { + raw.dispatch(groups); + } + } + ComputeCommand::DispatchIndirect { buffer_id, offset } => { + let scope = PassErrorScope::DispatchIndirect; + + state.is_ready().map_pass_err(scope)?; + + let indirect_buffer = state + .trackers + .buffers + .use_extend(&*buffer_guard, buffer_id, (), BufferUse::INDIRECT) + .map_err(|_| ComputePassErrorInner::InvalidIndirectBuffer(buffer_id)) + .map_pass_err(scope)?; + check_buffer_usage(indirect_buffer.usage, BufferUsage::INDIRECT) + .map_pass_err(scope)?; + let &(ref buf_raw, _) = indirect_buffer + .raw + .as_ref() + .ok_or(ComputePassErrorInner::InvalidIndirectBuffer(buffer_id)) + .map_pass_err(scope)?; + + state + .flush_states( + raw, + &mut cmd_buf.trackers, + &*bind_group_guard, + &*buffer_guard, + &*texture_guard, + ) + .map_pass_err(scope)?; + unsafe { + raw.dispatch_indirect(buf_raw, offset); + } + } + ComputeCommand::PushDebugGroup { color, len } => { + state.debug_scope_depth += 1; + + let label = str::from_utf8(&base.string_data[..len]).unwrap(); + unsafe { + raw.begin_debug_marker(label, color); + } + base.string_data = &base.string_data[len..]; + } + ComputeCommand::PopDebugGroup => { + let scope = PassErrorScope::PopDebugGroup; + + if state.debug_scope_depth == 0 { + return Err(ComputePassErrorInner::InvalidPopDebugGroup) + .map_pass_err(scope); + } + state.debug_scope_depth -= 1; + unsafe { + raw.end_debug_marker(); + } + } + ComputeCommand::InsertDebugMarker { color, len } => { + let label = str::from_utf8(&base.string_data[..len]).unwrap(); + unsafe { raw.insert_debug_marker(label, color) } + base.string_data = &base.string_data[len..]; + } + } + } + + Ok(()) + } +} + +pub mod compute_ffi { + use super::{ComputeCommand, ComputePass}; + use crate::{id, span, RawString}; + use std::{convert::TryInto, ffi, slice}; + use wgt::{BufferAddress, DynamicOffset}; + + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is + /// valid for `offset_length` elements. + // TODO: There might be other safety issues, such as using the unsafe + // `RawPass::encode` and `RawPass::encode_slice`. + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_set_bind_group( + pass: &mut ComputePass, + index: u32, + bind_group_id: id::BindGroupId, + offsets: *const DynamicOffset, + offset_length: usize, + ) { + span!(_guard, DEBUG, "ComputePass::set_bind_group"); + pass.base.commands.push(ComputeCommand::SetBindGroup { + index: index.try_into().unwrap(), + num_dynamic_offsets: offset_length.try_into().unwrap(), + bind_group_id, + }); + pass.base + .dynamic_offsets + .extend_from_slice(slice::from_raw_parts(offsets, offset_length)); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_set_pipeline( + pass: &mut ComputePass, + pipeline_id: id::ComputePipelineId, + ) { + span!(_guard, DEBUG, "ComputePass::set_pipeline"); + pass.base + .commands + .push(ComputeCommand::SetPipeline(pipeline_id)); + } + + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_set_push_constant( + pass: &mut ComputePass, + offset: u32, + size_bytes: u32, + data: *const u8, + ) { + span!(_guard, DEBUG, "ComputePass::set_push_constant"); + assert_eq!( + offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant offset must be aligned to 4 bytes." + ); + assert_eq!( + size_bytes & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + 0, + "Push constant size must be aligned to 4 bytes." + ); + let data_slice = slice::from_raw_parts(data, size_bytes as usize); + let value_offset = pass.base.push_constant_data.len().try_into().expect( + "Ran out of push constant space. Don't set 4gb of push constants per ComputePass.", + ); + + pass.base.push_constant_data.extend( + data_slice + .chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) + .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])), + ); + + pass.base.commands.push(ComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset: value_offset, + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_dispatch( + pass: &mut ComputePass, + groups_x: u32, + groups_y: u32, + groups_z: u32, + ) { + span!(_guard, DEBUG, "ComputePass::dispatch"); + pass.base + .commands + .push(ComputeCommand::Dispatch([groups_x, groups_y, groups_z])); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_dispatch_indirect( + pass: &mut ComputePass, + buffer_id: id::BufferId, + offset: BufferAddress, + ) { + span!(_guard, DEBUG, "ComputePass::dispatch_indirect"); + pass.base + .commands + .push(ComputeCommand::DispatchIndirect { buffer_id, offset }); + } + + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_push_debug_group( + pass: &mut ComputePass, + label: RawString, + color: u32, + ) { + span!(_guard, DEBUG, "ComputePass::push_debug_group"); + let bytes = ffi::CStr::from_ptr(label).to_bytes(); + pass.base.string_data.extend_from_slice(bytes); + + pass.base.commands.push(ComputeCommand::PushDebugGroup { + color, + len: bytes.len(), + }); + } + + #[no_mangle] + pub extern "C" fn wgpu_compute_pass_pop_debug_group(pass: &mut ComputePass) { + span!(_guard, DEBUG, "ComputePass::pop_debug_group"); + pass.base.commands.push(ComputeCommand::PopDebugGroup); + } + + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_insert_debug_marker( + pass: &mut ComputePass, + label: RawString, + color: u32, + ) { + span!(_guard, DEBUG, "ComputePass::insert_debug_marker"); + let bytes = ffi::CStr::from_ptr(label).to_bytes(); + pass.base.string_data.extend_from_slice(bytes); + + pass.base.commands.push(ComputeCommand::InsertDebugMarker { + color, + len: bytes.len(), + }); + } +} diff --git a/gfx/wgpu/wgpu-core/src/command/draw.rs b/gfx/wgpu/wgpu-core/src/command/draw.rs new file mode 100644 index 0000000000..30c19fef7f --- /dev/null +++ b/gfx/wgpu/wgpu-core/src/command/draw.rs @@ -0,0 +1,180 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/*! Draw structures - shared between render passes and bundles. +!*/ + +use crate::{ + binding_model::PushConstantUploadError, + id, + resource::BufferUse, + track::UseExtendError, + validation::{MissingBufferUsageError, MissingTextureUsageError}, +}; +use wgt::{BufferAddress, BufferSize, Color}; + +use std::num::NonZeroU32; +use thiserror::Error; + +pub type BufferError = UseExtendError; + +/// Error validating a draw call. +#[derive(Clone, Debug, Error, PartialEq)] +pub enum DrawError { + #[error("blend color needs to be set")] + MissingBlendColor, + #[error("render pipeline must be set")] + MissingPipeline, + #[error("current render pipeline has a layout which is incompatible with a currently set bind group, first differing at entry index {index}")] + IncompatibleBindGroup { + index: u32, + //expected: BindGroupLayoutId, + //provided: Option<(BindGroupLayoutId, BindGroupId)>, + }, + #[error("vertex {last_vertex} extends beyond limit {vertex_limit}")] + VertexBeyondLimit { last_vertex: u32, vertex_limit: u32 }, + #[error("instance {last_instance} extends beyond limit {instance_limit}")] + InstanceBeyondLimit { + last_instance: u32, + instance_limit: u32, + }, + #[error("index {last_index} extends beyond limit {index_limit}")] + IndexBeyondLimit { last_index: u32, index_limit: u32 }, +} + +/// Error encountered when encoding a render command. +/// This is the shared error set between render bundles and passes. +#[derive(Clone, Debug, Error)] +pub enum RenderCommandError { + #[error("bind group {0:?} is invalid")] + InvalidBindGroup(id::BindGroupId), + #[error("bind group index {index} is greater than the device's requested `max_bind_group` limit {max}")] + BindGroupIndexOutOfRange { index: u8, max: u32 }, + #[error("dynamic buffer offset {0} does not respect `BIND_BUFFER_ALIGNMENT`")] + UnalignedBufferOffset(u64), + #[error("number of buffer offsets ({actual}) does not match the number of dynamic bindings ({expected})")] + InvalidDynamicOffsetCount { actual: usize, expected: usize }, + #[error("render pipeline {0:?} is invalid")] + InvalidPipeline(id::RenderPipelineId), + #[error("Render pipeline is incompatible with render pass")] + IncompatiblePipeline(#[from] crate::device::RenderPassCompatibilityError), + #[error("pipeline is not compatible with the depth-stencil read-only render pass")] + IncompatibleReadOnlyDepthStencil, + #[error("buffer {0:?} is in error {1:?}")] + Buffer(id::BufferId, BufferError), + #[error("buffer {0:?} is destroyed")] + DestroyedBuffer(id::BufferId), + #[error(transparent)] + MissingBufferUsage(#[from] MissingBufferUsageError), + #[error(transparent)] + MissingTextureUsage(#[from] MissingTextureUsageError), + #[error(transparent)] + PushConstants(#[from] PushConstantUploadError), + #[error("Invalid Viewport parameters")] + InvalidViewport, + #[error("Invalid ScissorRect parameters")] + InvalidScissorRect, +} + +#[derive(Clone, Copy, Debug, Default)] +#[cfg_attr( + any(feature = "serial-pass", feature = "trace"), + derive(serde::Serialize) +)] +#[cfg_attr( + any(feature = "serial-pass", feature = "replay"), + derive(serde::Deserialize) +)] +pub struct Rect { + pub x: T, + pub y: T, + pub w: T, + pub h: T, +} + +#[doc(hidden)] +#[derive(Clone, Copy, Debug)] +#[cfg_attr( + any(feature = "serial-pass", feature = "trace"), + derive(serde::Serialize) +)] +#[cfg_attr( + any(feature = "serial-pass", feature = "replay"), + derive(serde::Deserialize) +)] +pub enum RenderCommand { + SetBindGroup { + index: u8, + num_dynamic_offsets: u8, + bind_group_id: id::BindGroupId, + }, + SetPipeline(id::RenderPipelineId), + SetIndexBuffer { + buffer_id: id::BufferId, + offset: BufferAddress, + size: Option, + }, + SetVertexBuffer { + slot: u32, + buffer_id: id::BufferId, + offset: BufferAddress, + size: Option, + }, + SetBlendColor(Color), + SetStencilReference(u32), + SetViewport { + rect: Rect, + //TODO: use half-float to reduce the size? + depth_min: f32, + depth_max: f32, + }, + SetScissor(Rect), + SetPushConstant { + stages: wgt::ShaderStage, + offset: u32, + size_bytes: u32, + /// None means there is no data and the data should be an array of zeros. + /// + /// Facilitates clears in renderbundles which explicitly do their clears. + values_offset: Option, + }, + Draw { + vertex_count: u32, + instance_count: u32, + first_vertex: u32, + first_instance: u32, + }, + DrawIndexed { + index_count: u32, + instance_count: u32, + first_index: u32, + base_vertex: i32, + first_instance: u32, + }, + MultiDrawIndirect { + buffer_id: id::BufferId, + offset: BufferAddress, + /// Count of `None` represents a non-multi call. + count: Option, + indexed: bool, + }, + MultiDrawIndirectCount { + buffer_id: id::BufferId, + offset: BufferAddress, + count_buffer_id: id::BufferId, + count_buffer_offset: BufferAddress, + max_count: u32, + indexed: bool, + }, + PushDebugGroup { + color: u32, + len: usize, + }, + PopDebugGroup, + InsertDebugMarker { + color: u32, + len: usize, + }, + ExecuteBundle(id::RenderBundleId), +} diff --git a/gfx/wgpu/wgpu-core/src/command/mod.rs b/gfx/wgpu/wgpu-core/src/command/mod.rs new file mode 100644 index 0000000000..1093bde155 --- /dev/null +++ b/gfx/wgpu/wgpu-core/src/command/mod.rs @@ -0,0 +1,362 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +mod allocator; +mod bind; +mod bundle; +mod compute; +mod draw; +mod render; +mod transfer; + +pub(crate) use self::allocator::CommandAllocator; +pub use self::allocator::CommandAllocatorError; +pub use self::bundle::*; +pub use self::compute::*; +pub use self::draw::*; +pub use self::render::*; +pub use self::transfer::*; + +use crate::{ + device::{all_buffer_stages, all_image_stages}, + hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, + id, + resource::{Buffer, Texture}, + span, + track::TrackerSet, + Label, PrivateFeatures, Stored, +}; + +use hal::command::CommandBuffer as _; +use thiserror::Error; + +use std::thread::ThreadId; + +const PUSH_CONSTANT_CLEAR_ARRAY: &[u32] = &[0_u32; 64]; + +#[derive(Debug)] +pub struct CommandBuffer { + pub(crate) raw: Vec, + is_recording: bool, + recorded_thread_id: ThreadId, + pub(crate) device_id: Stored, + pub(crate) trackers: TrackerSet, + pub(crate) used_swap_chain: Option<(Stored, B::Framebuffer)>, + limits: wgt::Limits, + private_features: PrivateFeatures, + #[cfg(feature = "trace")] + pub(crate) commands: Option>, + #[cfg(debug_assertions)] + pub(crate) label: String, +} + +impl CommandBuffer { + fn get_encoder( + storage: &mut Storage, + id: id::CommandEncoderId, + ) -> Result<&mut Self, CommandEncoderError> { + match storage.get_mut(id) { + Ok(cmd_buf) if cmd_buf.is_recording => Ok(cmd_buf), + Ok(_) => Err(CommandEncoderError::NotRecording), + Err(_) => Err(CommandEncoderError::Invalid), + } + } + + pub(crate) fn insert_barriers( + raw: &mut B::CommandBuffer, + base: &mut TrackerSet, + head: &TrackerSet, + buffer_guard: &Storage, id::BufferId>, + texture_guard: &Storage, id::TextureId>, + ) { + use hal::command::CommandBuffer as _; + + debug_assert_eq!(B::VARIANT, base.backend()); + debug_assert_eq!(B::VARIANT, head.backend()); + + let buffer_barriers = base.buffers.merge_replace(&head.buffers).map(|pending| { + let buf = &buffer_guard[pending.id]; + pending.into_hal(buf) + }); + let texture_barriers = base.textures.merge_replace(&head.textures).map(|pending| { + let tex = &texture_guard[pending.id]; + pending.into_hal(tex) + }); + base.views.merge_extend(&head.views).unwrap(); + base.bind_groups.merge_extend(&head.bind_groups).unwrap(); + base.samplers.merge_extend(&head.samplers).unwrap(); + base.compute_pipes + .merge_extend(&head.compute_pipes) + .unwrap(); + base.render_pipes.merge_extend(&head.render_pipes).unwrap(); + base.bundles.merge_extend(&head.bundles).unwrap(); + + let stages = all_buffer_stages() | all_image_stages(); + unsafe { + raw.pipeline_barrier( + stages..stages, + hal::memory::Dependencies::empty(), + buffer_barriers.chain(texture_barriers), + ); + } + } +} + +impl crate::hub::Resource for CommandBuffer { + const TYPE: &'static str = "CommandBuffer"; + + fn life_guard(&self) -> &crate::LifeGuard { + unreachable!() + } + + fn label(&self) -> &str { + #[cfg(debug_assertions)] + return &self.label; + #[cfg(not(debug_assertions))] + return ""; + } +} + +#[derive(Copy, Clone, Debug)] +pub struct BasePassRef<'a, C> { + pub commands: &'a [C], + pub dynamic_offsets: &'a [wgt::DynamicOffset], + pub string_data: &'a [u8], + pub push_constant_data: &'a [u32], +} + +#[doc(hidden)] +#[derive(Debug)] +#[cfg_attr( + any(feature = "serial-pass", feature = "trace"), + derive(serde::Serialize) +)] +#[cfg_attr( + any(feature = "serial-pass", feature = "replay"), + derive(serde::Deserialize) +)] +pub struct BasePass { + pub commands: Vec, + pub dynamic_offsets: Vec, + pub string_data: Vec, + pub push_constant_data: Vec, +} + +impl BasePass { + fn new() -> Self { + Self { + commands: Vec::new(), + dynamic_offsets: Vec::new(), + string_data: Vec::new(), + push_constant_data: Vec::new(), + } + } + + #[cfg(feature = "trace")] + fn from_ref(base: BasePassRef) -> Self { + Self { + commands: base.commands.to_vec(), + dynamic_offsets: base.dynamic_offsets.to_vec(), + string_data: base.string_data.to_vec(), + push_constant_data: base.push_constant_data.to_vec(), + } + } + + pub fn as_ref(&self) -> BasePassRef { + BasePassRef { + commands: &self.commands, + dynamic_offsets: &self.dynamic_offsets, + string_data: &self.string_data, + push_constant_data: &self.push_constant_data, + } + } +} + +#[derive(Clone, Debug, Error)] +pub enum CommandEncoderError { + #[error("command encoder is invalid")] + Invalid, + #[error("command encoder must be active")] + NotRecording, +} + +impl Global { + pub fn command_encoder_finish( + &self, + encoder_id: id::CommandEncoderId, + _desc: &wgt::CommandBufferDescriptor