From 8dd16259287f58f9273002717ec4d27e97127719 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 12 Jun 2024 07:43:14 +0200 Subject: Merging upstream version 127.0. Signed-off-by: Daniel Baumann --- third_party/rust/wgpu-core/src/any_surface.rs | 95 ---- third_party/rust/wgpu-core/src/binding_model.rs | 2 - .../rust/wgpu-core/src/command/allocator.rs | 67 +++ third_party/rust/wgpu-core/src/command/bundle.rs | 21 +- third_party/rust/wgpu-core/src/command/clear.rs | 9 + third_party/rust/wgpu-core/src/command/compute.rs | 318 +++++------- .../rust/wgpu-core/src/command/compute_command.rs | 322 ++++++++++++ third_party/rust/wgpu-core/src/command/mod.rs | 249 ++++++++-- third_party/rust/wgpu-core/src/command/query.rs | 11 +- third_party/rust/wgpu-core/src/command/render.rs | 168 ++----- third_party/rust/wgpu-core/src/command/transfer.rs | 35 ++ .../rust/wgpu-core/src/device/any_device.rs | 2 +- third_party/rust/wgpu-core/src/device/bgl.rs | 2 +- third_party/rust/wgpu-core/src/device/global.rs | 77 +-- third_party/rust/wgpu-core/src/device/life.rs | 123 +++-- third_party/rust/wgpu-core/src/device/mod.rs | 37 -- third_party/rust/wgpu-core/src/device/queue.rs | 169 +++---- third_party/rust/wgpu-core/src/device/resource.rs | 544 ++++++++++++--------- third_party/rust/wgpu-core/src/global.rs | 10 +- third_party/rust/wgpu-core/src/hal_api.rs | 20 +- third_party/rust/wgpu-core/src/hub.rs | 36 +- third_party/rust/wgpu-core/src/id.rs | 47 +- third_party/rust/wgpu-core/src/identity.rs | 40 +- third_party/rust/wgpu-core/src/instance.rs | 357 ++++++++------ third_party/rust/wgpu-core/src/lib.rs | 4 +- third_party/rust/wgpu-core/src/lock/mod.rs | 41 ++ third_party/rust/wgpu-core/src/lock/rank.rs | 170 +++++++ third_party/rust/wgpu-core/src/lock/ranked.rs | 397 +++++++++++++++ third_party/rust/wgpu-core/src/lock/vanilla.rs | 121 +++++ third_party/rust/wgpu-core/src/pipeline.rs | 102 +--- third_party/rust/wgpu-core/src/pool.rs | 12 +- third_party/rust/wgpu-core/src/present.rs | 30 +- third_party/rust/wgpu-core/src/registry.rs | 89 +++- third_party/rust/wgpu-core/src/resource.rs | 74 ++- third_party/rust/wgpu-core/src/snatch.rs | 88 +++- third_party/rust/wgpu-core/src/storage.rs | 5 +- third_party/rust/wgpu-core/src/track/buffer.rs | 24 +- third_party/rust/wgpu-core/src/track/metadata.rs | 10 +- third_party/rust/wgpu-core/src/track/mod.rs | 45 +- third_party/rust/wgpu-core/src/track/stateless.rs | 22 +- third_party/rust/wgpu-core/src/track/texture.rs | 4 +- third_party/rust/wgpu-core/src/validation.rs | 3 +- 42 files changed, 2661 insertions(+), 1341 deletions(-) delete mode 100644 third_party/rust/wgpu-core/src/any_surface.rs create mode 100644 third_party/rust/wgpu-core/src/command/allocator.rs create mode 100644 third_party/rust/wgpu-core/src/command/compute_command.rs create mode 100644 third_party/rust/wgpu-core/src/lock/mod.rs create mode 100644 third_party/rust/wgpu-core/src/lock/rank.rs create mode 100644 third_party/rust/wgpu-core/src/lock/ranked.rs create mode 100644 third_party/rust/wgpu-core/src/lock/vanilla.rs (limited to 'third_party/rust/wgpu-core/src') diff --git a/third_party/rust/wgpu-core/src/any_surface.rs b/third_party/rust/wgpu-core/src/any_surface.rs deleted file mode 100644 index 94edfc4433..0000000000 --- a/third_party/rust/wgpu-core/src/any_surface.rs +++ /dev/null @@ -1,95 +0,0 @@ -use wgt::Backend; - -/// The `AnySurface` type: a `Arc` of a `A::Surface` for any backend `A`. -use crate::hal_api::HalApi; - -use std::fmt; -use std::mem::ManuallyDrop; -use std::ptr::NonNull; - -struct AnySurfaceVtable { - // We oppurtunistically store the backend here, since we now it will be used - // with backend selection and it can be stored in static memory. - backend: Backend, - // Drop glue which knows how to drop the stored data. - drop: unsafe fn(*mut ()), -} - -/// An `A::Surface`, for any backend `A`. -/// -/// Any `AnySurface` is just like an `A::Surface`, except that the `A` type -/// parameter is erased. To access the `Surface`, you must downcast to a -/// particular backend with the \[`downcast_ref`\] or \[`take`\] methods. -pub struct AnySurface { - data: NonNull<()>, - vtable: &'static AnySurfaceVtable, -} - -impl AnySurface { - /// Construct an `AnySurface` that owns an `A::Surface`. - pub fn new(surface: A::Surface) -> AnySurface { - unsafe fn drop_glue(ptr: *mut ()) { - unsafe { - _ = Box::from_raw(ptr.cast::()); - } - } - - let data = NonNull::from(Box::leak(Box::new(surface))); - - AnySurface { - data: data.cast(), - vtable: &AnySurfaceVtable { - backend: A::VARIANT, - drop: drop_glue::, - }, - } - } - - /// Get the backend this surface was created through. - pub fn backend(&self) -> Backend { - self.vtable.backend - } - - /// If `self` refers to an `A::Surface`, returns a reference to it. - pub fn downcast_ref(&self) -> Option<&A::Surface> { - if A::VARIANT != self.vtable.backend { - return None; - } - - // SAFETY: We just checked the instance above implicitly by the backend - // that it was statically constructed through. - Some(unsafe { &*self.data.as_ptr().cast::() }) - } - - /// If `self` is an `Arc`, returns that. - pub fn take(self) -> Option { - if A::VARIANT != self.vtable.backend { - return None; - } - - // Disable drop glue, since we're returning the owned surface. The - // caller will be responsible for dropping it. - let this = ManuallyDrop::new(self); - - // SAFETY: We just checked the instance above implicitly by the backend - // that it was statically constructed through. - Some(unsafe { *Box::from_raw(this.data.as_ptr().cast::()) }) - } -} - -impl Drop for AnySurface { - fn drop(&mut self) { - unsafe { (self.vtable.drop)(self.data.as_ptr()) } - } -} - -impl fmt::Debug for AnySurface { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "AnySurface<{}>", self.vtable.backend) - } -} - -#[cfg(send_sync)] -unsafe impl Send for AnySurface {} -#[cfg(send_sync)] -unsafe impl Sync for AnySurface {} diff --git a/third_party/rust/wgpu-core/src/binding_model.rs b/third_party/rust/wgpu-core/src/binding_model.rs index 8689af2ac1..bea63b0ba4 100644 --- a/third_party/rust/wgpu-core/src/binding_model.rs +++ b/third_party/rust/wgpu-core/src/binding_model.rs @@ -462,8 +462,6 @@ pub struct BindGroupLayoutDescriptor<'a> { pub entries: Cow<'a, [wgt::BindGroupLayoutEntry]>, } -pub type BindGroupLayouts = crate::storage::Storage>; - /// Bind group layout. #[derive(Debug)] pub struct BindGroupLayout { diff --git a/third_party/rust/wgpu-core/src/command/allocator.rs b/third_party/rust/wgpu-core/src/command/allocator.rs new file mode 100644 index 0000000000..e17fd08d76 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/allocator.rs @@ -0,0 +1,67 @@ +use crate::hal_api::HalApi; +use crate::resource_log; +use hal::Device as _; + +use crate::lock::{rank, Mutex}; + +/// A pool of free [`wgpu_hal::CommandEncoder`]s, owned by a `Device`. +/// +/// Each encoder in this list is in the "closed" state. +/// +/// Since a raw [`CommandEncoder`][ce] is itself a pool for allocating +/// raw [`CommandBuffer`][cb]s, this is a pool of pools. +/// +/// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder +/// [ce]: hal::CommandEncoder +/// [cb]: hal::Api::CommandBuffer +pub(crate) struct CommandAllocator { + free_encoders: Mutex>, +} + +impl CommandAllocator { + pub(crate) fn new() -> Self { + Self { + free_encoders: Mutex::new(rank::COMMAND_ALLOCATOR_FREE_ENCODERS, Vec::new()), + } + } + + /// Return a fresh [`wgpu_hal::CommandEncoder`] in the "closed" state. + /// + /// If we have free encoders in the pool, take one of those. Otherwise, + /// create a new one on `device`. + /// + /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder + pub(crate) fn acquire_encoder( + &self, + device: &A::Device, + queue: &A::Queue, + ) -> Result { + let mut free_encoders = self.free_encoders.lock(); + match free_encoders.pop() { + Some(encoder) => Ok(encoder), + None => unsafe { + let hal_desc = hal::CommandEncoderDescriptor { label: None, queue }; + device.create_command_encoder(&hal_desc) + }, + } + } + + /// Add `encoder` back to the free pool. + pub(crate) fn release_encoder(&self, encoder: A::CommandEncoder) { + let mut free_encoders = self.free_encoders.lock(); + free_encoders.push(encoder); + } + + /// Free the pool of command encoders. + /// + /// This is only called when the `Device` is dropped. + pub(crate) fn dispose(&self, device: &A::Device) { + let mut free_encoders = self.free_encoders.lock(); + resource_log!("CommandAllocator::dispose encoders {}", free_encoders.len()); + for cmd_encoder in free_encoders.drain(..) { + unsafe { + device.destroy_command_encoder(cmd_encoder); + } + } + } +} diff --git a/third_party/rust/wgpu-core/src/command/bundle.rs b/third_party/rust/wgpu-core/src/command/bundle.rs index 47beda8ec6..d9d821c533 100644 --- a/third_party/rust/wgpu-core/src/command/bundle.rs +++ b/third_party/rust/wgpu-core/src/command/bundle.rs @@ -73,7 +73,7 @@ index format changes. [Gdcrbe]: crate::global::Global::device_create_render_bundle_encoder [Grbef]: crate::global::Global::render_bundle_encoder_finish -[wrpeb]: crate::command::render_ffi::wgpu_render_pass_execute_bundles +[wrpeb]: crate::command::render::render_commands::wgpu_render_pass_execute_bundles !*/ #![allow(clippy::reversed_empty_ranges)] @@ -113,7 +113,7 @@ use hal::CommandEncoder as _; use super::ArcRenderCommand; -/// https://gpuweb.github.io/gpuweb/#dom-gpurendercommandsmixin-draw +/// fn validate_draw( vertex: &[Option>], step: &[VertexStep], @@ -1548,15 +1548,14 @@ pub mod bundle_ffi { offsets: *const DynamicOffset, offset_length: usize, ) { - let redundant = unsafe { - bundle.current_bind_groups.set_and_check_redundant( - bind_group_id, - index, - &mut bundle.base.dynamic_offsets, - offsets, - offset_length, - ) - }; + let offsets = unsafe { slice::from_raw_parts(offsets, offset_length) }; + + let redundant = bundle.current_bind_groups.set_and_check_redundant( + bind_group_id, + index, + &mut bundle.base.dynamic_offsets, + offsets, + ); if redundant { return; diff --git a/third_party/rust/wgpu-core/src/command/clear.rs b/third_party/rust/wgpu-core/src/command/clear.rs index 72c923f82e..faff177928 100644 --- a/third_party/rust/wgpu-core/src/command/clear.rs +++ b/third_party/rust/wgpu-core/src/command/clear.rs @@ -104,6 +104,11 @@ impl Global { let dst_buffer = buffer_guard .get(dst) .map_err(|_| ClearError::InvalidBuffer(dst))?; + + if dst_buffer.device.as_info().id() != cmd_buf.device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + cmd_buf_data .trackers .buffers @@ -200,6 +205,10 @@ impl Global { .get(dst) .map_err(|_| ClearError::InvalidTexture(dst))?; + if dst_texture.device.as_info().id() != cmd_buf.device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + // Check if subresource aspects are valid. let clear_aspects = hal::FormatAspects::new(dst_texture.desc.format, subresource_range.aspect); diff --git a/third_party/rust/wgpu-core/src/command/compute.rs b/third_party/rust/wgpu-core/src/command/compute.rs index b38324984c..4ee48f0086 100644 --- a/third_party/rust/wgpu-core/src/command/compute.rs +++ b/third_party/rust/wgpu-core/src/command/compute.rs @@ -1,3 +1,4 @@ +use crate::command::compute_command::{ArcComputeCommand, ComputeCommand}; use crate::device::DeviceError; use crate::resource::Resource; use crate::snatch::SnatchGuard; @@ -20,7 +21,6 @@ use crate::{ hal_label, id, id::DeviceId, init_tracker::MemoryInitKind, - pipeline, resource::{self}, storage::Storage, track::{Tracker, UsageConflict, UsageScope}, @@ -36,61 +36,9 @@ use serde::Serialize; use thiserror::Error; +use std::sync::Arc; use std::{fmt, mem, str}; -#[doc(hidden)] -#[derive(Clone, Copy, Debug)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub enum ComputeCommand { - SetBindGroup { - index: u32, - num_dynamic_offsets: usize, - bind_group_id: id::BindGroupId, - }, - SetPipeline(id::ComputePipelineId), - - /// Set a range of push constants to values stored in [`BasePass::push_constant_data`]. - SetPushConstant { - /// The byte offset within the push constant storage to write to. This - /// must be a multiple of four. - offset: u32, - - /// The number of bytes to write. This must be a multiple of four. - size_bytes: u32, - - /// Index in [`BasePass::push_constant_data`] of the start of the data - /// to be written. - /// - /// Note: this is not a byte offset like `offset`. Rather, it is the - /// index of the first `u32` element in `push_constant_data` to read. - values_offset: u32, - }, - - Dispatch([u32; 3]), - DispatchIndirect { - buffer_id: id::BufferId, - offset: wgt::BufferAddress, - }, - PushDebugGroup { - color: u32, - len: usize, - }, - PopDebugGroup, - InsertDebugMarker { - color: u32, - len: usize, - }, - WriteTimestamp { - query_set_id: id::QuerySetId, - query_index: u32, - }, - BeginPipelineStatisticsQuery { - query_set_id: id::QuerySetId, - query_index: u32, - }, - EndPipelineStatisticsQuery, -} - #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct ComputePass { base: BasePass, @@ -184,7 +132,7 @@ pub enum ComputePassErrorInner { #[error(transparent)] Encoder(#[from] CommandEncoderError), #[error("Bind group at index {0:?} is invalid")] - InvalidBindGroup(usize), + InvalidBindGroup(u32), #[error("Device {0:?} is invalid")] InvalidDevice(DeviceId), #[error("Bind group index {index} is greater than the device's requested `max_bind_group` limit {max}")] @@ -249,7 +197,7 @@ impl PrettyError for ComputePassErrorInner { pub struct ComputePassError { pub scope: PassErrorScope, #[source] - inner: ComputePassErrorInner, + pub(super) inner: ComputePassErrorInner, } impl PrettyError for ComputePassError { fn fmt_pretty(&self, fmt: &mut ErrorFormatter) { @@ -346,7 +294,8 @@ impl Global { encoder_id: id::CommandEncoderId, pass: &ComputePass, ) -> Result<(), ComputePassError> { - self.command_encoder_run_compute_pass_impl::( + // TODO: This should go directly to `command_encoder_run_compute_pass_impl` by means of storing `ArcComputeCommand` internally. + self.command_encoder_run_compute_pass_with_unresolved_commands::( encoder_id, pass.base.as_ref(), pass.timestamp_writes.as_ref(), @@ -354,18 +303,41 @@ impl Global { } #[doc(hidden)] - pub fn command_encoder_run_compute_pass_impl( + pub fn command_encoder_run_compute_pass_with_unresolved_commands( &self, encoder_id: id::CommandEncoderId, base: BasePassRef, timestamp_writes: Option<&ComputePassTimestampWrites>, + ) -> Result<(), ComputePassError> { + let resolved_commands = + ComputeCommand::resolve_compute_command_ids(A::hub(self), base.commands)?; + + self.command_encoder_run_compute_pass_impl::( + encoder_id, + BasePassRef { + label: base.label, + commands: &resolved_commands, + dynamic_offsets: base.dynamic_offsets, + string_data: base.string_data, + push_constant_data: base.push_constant_data, + }, + timestamp_writes, + ) + } + + fn command_encoder_run_compute_pass_impl( + &self, + encoder_id: id::CommandEncoderId, + base: BasePassRef>, + timestamp_writes: Option<&ComputePassTimestampWrites>, ) -> Result<(), ComputePassError> { profiling::scope!("CommandEncoder::run_compute_pass"); let pass_scope = PassErrorScope::Pass(encoder_id); let hub = A::hub(self); - let cmd_buf = CommandBuffer::get_encoder(hub, encoder_id).map_pass_err(pass_scope)?; + let cmd_buf: Arc> = + CommandBuffer::get_encoder(hub, encoder_id).map_pass_err(pass_scope)?; let device = &cmd_buf.device; if !device.is_valid() { return Err(ComputePassErrorInner::InvalidDevice( @@ -380,7 +352,13 @@ impl Global { #[cfg(feature = "trace")] if let Some(ref mut list) = cmd_buf_data.commands { list.push(crate::device::trace::Command::RunComputePass { - base: BasePass::from_ref(base), + base: BasePass { + label: base.label.map(str::to_string), + commands: base.commands.iter().map(Into::into).collect(), + dynamic_offsets: base.dynamic_offsets.to_vec(), + string_data: base.string_data.to_vec(), + push_constant_data: base.push_constant_data.to_vec(), + }, timestamp_writes: timestamp_writes.cloned(), }); } @@ -400,9 +378,7 @@ impl Global { let raw = encoder.open().map_pass_err(pass_scope)?; let bind_group_guard = hub.bind_groups.read(); - let pipeline_guard = hub.compute_pipelines.read(); let query_set_guard = hub.query_sets.read(); - let buffer_guard = hub.buffers.read(); let mut state = State { binder: Binder::new(), @@ -480,19 +456,21 @@ impl Global { // be inserted before texture reads. let mut pending_discard_init_fixups = SurfacesInDiscardState::new(); + // TODO: We should be draining the commands here, avoiding extra copies in the process. + // (A command encoder can't be executed twice!) for command in base.commands { - match *command { - ComputeCommand::SetBindGroup { + match command { + ArcComputeCommand::SetBindGroup { index, num_dynamic_offsets, - bind_group_id, + bind_group, } => { - let scope = PassErrorScope::SetBindGroup(bind_group_id); + let scope = PassErrorScope::SetBindGroup(bind_group.as_info().id()); let max_bind_groups = cmd_buf.limits.max_bind_groups; - if index >= max_bind_groups { + if index >= &max_bind_groups { return Err(ComputePassErrorInner::BindGroupIndexOutOfRange { - index, + index: *index, max: max_bind_groups, }) .map_pass_err(scope); @@ -505,13 +483,9 @@ impl Global { ); dynamic_offset_count += num_dynamic_offsets; - let bind_group = tracker - .bind_groups - .add_single(&*bind_group_guard, bind_group_id) - .ok_or(ComputePassErrorInner::InvalidBindGroup(index as usize)) - .map_pass_err(scope)?; + let bind_group = tracker.bind_groups.insert_single(bind_group.clone()); bind_group - .validate_dynamic_bindings(index, &temp_offsets, &cmd_buf.limits) + .validate_dynamic_bindings(*index, &temp_offsets, &cmd_buf.limits) .map_pass_err(scope)?; buffer_memory_init_actions.extend( @@ -533,14 +507,14 @@ impl Global { let entries = state .binder - .assign_group(index as usize, bind_group, &temp_offsets); + .assign_group(*index as usize, bind_group, &temp_offsets); if !entries.is_empty() && pipeline_layout.is_some() { let pipeline_layout = pipeline_layout.as_ref().unwrap().raw(); for (i, e) in entries.iter().enumerate() { if let Some(group) = e.group.as_ref() { let raw_bg = group .raw(&snatch_guard) - .ok_or(ComputePassErrorInner::InvalidBindGroup(i)) + .ok_or(ComputePassErrorInner::InvalidBindGroup(i as u32)) .map_pass_err(scope)?; unsafe { raw.set_bind_group( @@ -554,16 +528,13 @@ impl Global { } } } - ComputeCommand::SetPipeline(pipeline_id) => { + ArcComputeCommand::SetPipeline(pipeline) => { + let pipeline_id = pipeline.as_info().id(); let scope = PassErrorScope::SetPipelineCompute(pipeline_id); state.pipeline = Some(pipeline_id); - let pipeline: &pipeline::ComputePipeline = tracker - .compute_pipelines - .add_single(&*pipeline_guard, pipeline_id) - .ok_or(ComputePassErrorInner::InvalidPipeline(pipeline_id)) - .map_pass_err(scope)?; + tracker.compute_pipelines.insert_single(pipeline.clone()); unsafe { raw.set_compute_pipeline(pipeline.raw()); @@ -587,7 +558,7 @@ impl Global { if let Some(group) = e.group.as_ref() { let raw_bg = group .raw(&snatch_guard) - .ok_or(ComputePassErrorInner::InvalidBindGroup(i)) + .ok_or(ComputePassErrorInner::InvalidBindGroup(i as u32)) .map_pass_err(scope)?; unsafe { raw.set_bind_group( @@ -623,7 +594,7 @@ impl Global { } } } - ComputeCommand::SetPushConstant { + ArcComputeCommand::SetPushConstant { offset, size_bytes, values_offset, @@ -634,7 +605,7 @@ impl Global { let values_end_offset = (values_offset + size_bytes / wgt::PUSH_CONSTANT_ALIGNMENT) as usize; let data_slice = - &base.push_constant_data[(values_offset as usize)..values_end_offset]; + &base.push_constant_data[(*values_offset as usize)..values_end_offset]; let pipeline_layout = state .binder @@ -649,7 +620,7 @@ impl Global { pipeline_layout .validate_push_constant_ranges( wgt::ShaderStages::COMPUTE, - offset, + *offset, end_offset_bytes, ) .map_pass_err(scope)?; @@ -658,12 +629,12 @@ impl Global { raw.set_push_constants( pipeline_layout.raw(), wgt::ShaderStages::COMPUTE, - offset, + *offset, data_slice, ); } } - ComputeCommand::Dispatch(groups) => { + ArcComputeCommand::Dispatch(groups) => { let scope = PassErrorScope::Dispatch { indirect: false, pipeline: state.pipeline, @@ -688,7 +659,7 @@ impl Global { { return Err(ComputePassErrorInner::Dispatch( DispatchError::InvalidGroupSize { - current: groups, + current: *groups, limit: groups_size_limit, }, )) @@ -696,10 +667,11 @@ impl Global { } unsafe { - raw.dispatch(groups); + raw.dispatch(*groups); } } - ComputeCommand::DispatchIndirect { buffer_id, offset } => { + ArcComputeCommand::DispatchIndirect { buffer, offset } => { + let buffer_id = buffer.as_info().id(); let scope = PassErrorScope::Dispatch { indirect: true, pipeline: state.pipeline, @@ -711,29 +683,25 @@ impl Global { .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) .map_pass_err(scope)?; - let indirect_buffer = state + state .scope .buffers - .merge_single(&*buffer_guard, buffer_id, hal::BufferUses::INDIRECT) + .insert_merge_single(buffer.clone(), hal::BufferUses::INDIRECT) + .map_pass_err(scope)?; + check_buffer_usage(buffer_id, buffer.usage, wgt::BufferUsages::INDIRECT) .map_pass_err(scope)?; - check_buffer_usage( - buffer_id, - indirect_buffer.usage, - wgt::BufferUsages::INDIRECT, - ) - .map_pass_err(scope)?; let end_offset = offset + mem::size_of::() as u64; - if end_offset > indirect_buffer.size { + if end_offset > buffer.size { return Err(ComputePassErrorInner::IndirectBufferOverrun { - offset, + offset: *offset, end_offset, - buffer_size: indirect_buffer.size, + buffer_size: buffer.size, }) .map_pass_err(scope); } - let buf_raw = indirect_buffer + let buf_raw = buffer .raw .get(&snatch_guard) .ok_or(ComputePassErrorInner::InvalidIndirectBuffer(buffer_id)) @@ -742,9 +710,9 @@ impl Global { let stride = 3 * 4; // 3 integers, x/y/z group size buffer_memory_init_actions.extend( - indirect_buffer.initialization_status.read().create_action( - indirect_buffer, - offset..(offset + stride), + buffer.initialization_status.read().create_action( + buffer, + *offset..(*offset + stride), MemoryInitKind::NeedsInitializedMemory, ), ); @@ -754,15 +722,15 @@ impl Global { raw, &mut intermediate_trackers, &*bind_group_guard, - Some(indirect_buffer.as_info().tracker_index()), + Some(buffer.as_info().tracker_index()), &snatch_guard, ) .map_pass_err(scope)?; unsafe { - raw.dispatch_indirect(buf_raw, offset); + raw.dispatch_indirect(buf_raw, *offset); } } - ComputeCommand::PushDebugGroup { color: _, len } => { + ArcComputeCommand::PushDebugGroup { color: _, len } => { state.debug_scope_depth += 1; if !discard_hal_labels { let label = @@ -774,7 +742,7 @@ impl Global { } string_offset += len; } - ComputeCommand::PopDebugGroup => { + ArcComputeCommand::PopDebugGroup => { let scope = PassErrorScope::PopDebugGroup; if state.debug_scope_depth == 0 { @@ -788,7 +756,7 @@ impl Global { } } } - ComputeCommand::InsertDebugMarker { color: _, len } => { + ArcComputeCommand::InsertDebugMarker { color: _, len } => { if !discard_hal_labels { let label = str::from_utf8(&base.string_data[string_offset..string_offset + len]) @@ -797,49 +765,43 @@ impl Global { } string_offset += len; } - ComputeCommand::WriteTimestamp { - query_set_id, + ArcComputeCommand::WriteTimestamp { + query_set, query_index, } => { + let query_set_id = query_set.as_info().id(); let scope = PassErrorScope::WriteTimestamp; device .require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES) .map_pass_err(scope)?; - let query_set: &resource::QuerySet = tracker - .query_sets - .add_single(&*query_set_guard, query_set_id) - .ok_or(ComputePassErrorInner::InvalidQuerySet(query_set_id)) - .map_pass_err(scope)?; + let query_set = tracker.query_sets.insert_single(query_set.clone()); query_set - .validate_and_write_timestamp(raw, query_set_id, query_index, None) + .validate_and_write_timestamp(raw, query_set_id, *query_index, None) .map_pass_err(scope)?; } - ComputeCommand::BeginPipelineStatisticsQuery { - query_set_id, + ArcComputeCommand::BeginPipelineStatisticsQuery { + query_set, query_index, } => { + let query_set_id = query_set.as_info().id(); let scope = PassErrorScope::BeginPipelineStatisticsQuery; - let query_set: &resource::QuerySet = tracker - .query_sets - .add_single(&*query_set_guard, query_set_id) - .ok_or(ComputePassErrorInner::InvalidQuerySet(query_set_id)) - .map_pass_err(scope)?; + let query_set = tracker.query_sets.insert_single(query_set.clone()); query_set .validate_and_begin_pipeline_statistics_query( raw, query_set_id, - query_index, + *query_index, None, &mut active_query, ) .map_pass_err(scope)?; } - ComputeCommand::EndPipelineStatisticsQuery => { + ArcComputeCommand::EndPipelineStatisticsQuery => { let scope = PassErrorScope::EndPipelineStatisticsQuery; end_pipeline_statistics_query(raw, &*query_set_guard, &mut active_query) @@ -883,33 +845,24 @@ impl Global { } } -pub mod compute_ffi { +pub mod compute_commands { use super::{ComputeCommand, ComputePass}; - use crate::{id, RawString}; - use std::{convert::TryInto, ffi, slice}; + use crate::id; + use std::convert::TryInto; use wgt::{BufferAddress, DynamicOffset}; - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given pointer is - /// valid for `offset_length` elements. - #[no_mangle] - pub unsafe extern "C" fn wgpu_compute_pass_set_bind_group( + pub fn wgpu_compute_pass_set_bind_group( pass: &mut ComputePass, index: u32, bind_group_id: id::BindGroupId, - offsets: *const DynamicOffset, - offset_length: usize, + offsets: &[DynamicOffset], ) { - let redundant = unsafe { - pass.current_bind_groups.set_and_check_redundant( - bind_group_id, - index, - &mut pass.base.dynamic_offsets, - offsets, - offset_length, - ) - }; + let redundant = pass.current_bind_groups.set_and_check_redundant( + bind_group_id, + index, + &mut pass.base.dynamic_offsets, + offsets, + ); if redundant { return; @@ -917,13 +870,12 @@ pub mod compute_ffi { pass.base.commands.push(ComputeCommand::SetBindGroup { index, - num_dynamic_offsets: offset_length, + num_dynamic_offsets: offsets.len(), bind_group_id, }); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_set_pipeline( + pub fn wgpu_compute_pass_set_pipeline( pass: &mut ComputePass, pipeline_id: id::ComputePipelineId, ) { @@ -936,47 +888,34 @@ pub mod compute_ffi { .push(ComputeCommand::SetPipeline(pipeline_id)); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given pointer is - /// valid for `size_bytes` bytes. - #[no_mangle] - pub unsafe extern "C" fn wgpu_compute_pass_set_push_constant( - pass: &mut ComputePass, - offset: u32, - size_bytes: u32, - data: *const u8, - ) { + pub fn wgpu_compute_pass_set_push_constant(pass: &mut ComputePass, offset: u32, data: &[u8]) { assert_eq!( offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), 0, "Push constant offset must be aligned to 4 bytes." ); assert_eq!( - size_bytes & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + data.len() as u32 & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), 0, "Push constant size must be aligned to 4 bytes." ); - let data_slice = unsafe { slice::from_raw_parts(data, size_bytes as usize) }; let value_offset = pass.base.push_constant_data.len().try_into().expect( "Ran out of push constant space. Don't set 4gb of push constants per ComputePass.", ); pass.base.push_constant_data.extend( - data_slice - .chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) + data.chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])), ); pass.base.commands.push(ComputeCommand::SetPushConstant { offset, - size_bytes, + size_bytes: data.len() as u32, values_offset: value_offset, }); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_dispatch_workgroups( + pub fn wgpu_compute_pass_dispatch_workgroups( pass: &mut ComputePass, groups_x: u32, groups_y: u32, @@ -987,8 +926,7 @@ pub mod compute_ffi { .push(ComputeCommand::Dispatch([groups_x, groups_y, groups_z])); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_dispatch_workgroups_indirect( + pub fn wgpu_compute_pass_dispatch_workgroups_indirect( pass: &mut ComputePass, buffer_id: id::BufferId, offset: BufferAddress, @@ -998,17 +936,8 @@ pub mod compute_ffi { .push(ComputeCommand::DispatchIndirect { buffer_id, offset }); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given `label` - /// is a valid null-terminated string. - #[no_mangle] - pub unsafe extern "C" fn wgpu_compute_pass_push_debug_group( - pass: &mut ComputePass, - label: RawString, - color: u32, - ) { - let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pub fn wgpu_compute_pass_push_debug_group(pass: &mut ComputePass, label: &str, color: u32) { + let bytes = label.as_bytes(); pass.base.string_data.extend_from_slice(bytes); pass.base.commands.push(ComputeCommand::PushDebugGroup { @@ -1017,22 +946,12 @@ pub mod compute_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_pop_debug_group(pass: &mut ComputePass) { + pub fn wgpu_compute_pass_pop_debug_group(pass: &mut ComputePass) { pass.base.commands.push(ComputeCommand::PopDebugGroup); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given `label` - /// is a valid null-terminated string. - #[no_mangle] - pub unsafe extern "C" fn wgpu_compute_pass_insert_debug_marker( - pass: &mut ComputePass, - label: RawString, - color: u32, - ) { - let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pub fn wgpu_compute_pass_insert_debug_marker(pass: &mut ComputePass, label: &str, color: u32) { + let bytes = label.as_bytes(); pass.base.string_data.extend_from_slice(bytes); pass.base.commands.push(ComputeCommand::InsertDebugMarker { @@ -1041,8 +960,7 @@ pub mod compute_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_write_timestamp( + pub fn wgpu_compute_pass_write_timestamp( pass: &mut ComputePass, query_set_id: id::QuerySetId, query_index: u32, @@ -1053,8 +971,7 @@ pub mod compute_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_begin_pipeline_statistics_query( + pub fn wgpu_compute_pass_begin_pipeline_statistics_query( pass: &mut ComputePass, query_set_id: id::QuerySetId, query_index: u32, @@ -1067,8 +984,7 @@ pub mod compute_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_compute_pass_end_pipeline_statistics_query(pass: &mut ComputePass) { + pub fn wgpu_compute_pass_end_pipeline_statistics_query(pass: &mut ComputePass) { pass.base .commands .push(ComputeCommand::EndPipelineStatisticsQuery); diff --git a/third_party/rust/wgpu-core/src/command/compute_command.rs b/third_party/rust/wgpu-core/src/command/compute_command.rs new file mode 100644 index 0000000000..49fdbbec24 --- /dev/null +++ b/third_party/rust/wgpu-core/src/command/compute_command.rs @@ -0,0 +1,322 @@ +use std::sync::Arc; + +use crate::{ + binding_model::BindGroup, + hal_api::HalApi, + id, + pipeline::ComputePipeline, + resource::{Buffer, QuerySet}, +}; + +use super::{ComputePassError, ComputePassErrorInner, PassErrorScope}; + +#[derive(Clone, Copy, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum ComputeCommand { + SetBindGroup { + index: u32, + num_dynamic_offsets: usize, + bind_group_id: id::BindGroupId, + }, + + SetPipeline(id::ComputePipelineId), + + /// Set a range of push constants to values stored in `push_constant_data`. + SetPushConstant { + /// The byte offset within the push constant storage to write to. This + /// must be a multiple of four. + offset: u32, + + /// The number of bytes to write. This must be a multiple of four. + size_bytes: u32, + + /// Index in `push_constant_data` of the start of the data + /// to be written. + /// + /// Note: this is not a byte offset like `offset`. Rather, it is the + /// index of the first `u32` element in `push_constant_data` to read. + values_offset: u32, + }, + + Dispatch([u32; 3]), + + DispatchIndirect { + buffer_id: id::BufferId, + offset: wgt::BufferAddress, + }, + + PushDebugGroup { + color: u32, + len: usize, + }, + + PopDebugGroup, + + InsertDebugMarker { + color: u32, + len: usize, + }, + + WriteTimestamp { + query_set_id: id::QuerySetId, + query_index: u32, + }, + + BeginPipelineStatisticsQuery { + query_set_id: id::QuerySetId, + query_index: u32, + }, + + EndPipelineStatisticsQuery, +} + +impl ComputeCommand { + /// Resolves all ids in a list of commands into the corresponding resource Arc. + /// + // TODO: Once resolving is done on-the-fly during recording, this function should be only needed with the replay feature: + // #[cfg(feature = "replay")] + pub fn resolve_compute_command_ids( + hub: &crate::hub::Hub, + commands: &[ComputeCommand], + ) -> Result>, ComputePassError> { + let buffers_guard = hub.buffers.read(); + let bind_group_guard = hub.bind_groups.read(); + let query_set_guard = hub.query_sets.read(); + let pipelines_guard = hub.compute_pipelines.read(); + + let resolved_commands: Vec> = commands + .iter() + .map(|c| -> Result, ComputePassError> { + Ok(match *c { + ComputeCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + } => ArcComputeCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group: bind_group_guard.get_owned(bind_group_id).map_err(|_| { + ComputePassError { + scope: PassErrorScope::SetBindGroup(bind_group_id), + inner: ComputePassErrorInner::InvalidBindGroup(index), + } + })?, + }, + + ComputeCommand::SetPipeline(pipeline_id) => ArcComputeCommand::SetPipeline( + pipelines_guard + .get_owned(pipeline_id) + .map_err(|_| ComputePassError { + scope: PassErrorScope::SetPipelineCompute(pipeline_id), + inner: ComputePassErrorInner::InvalidPipeline(pipeline_id), + })?, + ), + + ComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset, + } => ArcComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset, + }, + + ComputeCommand::Dispatch(dim) => ArcComputeCommand::Dispatch(dim), + + ComputeCommand::DispatchIndirect { buffer_id, offset } => { + ArcComputeCommand::DispatchIndirect { + buffer: buffers_guard.get_owned(buffer_id).map_err(|_| { + ComputePassError { + scope: PassErrorScope::Dispatch { + indirect: true, + pipeline: None, // TODO: not used right now, but once we do the resolve during recording we can use this again. + }, + inner: ComputePassErrorInner::InvalidBuffer(buffer_id), + } + })?, + offset, + } + } + + ComputeCommand::PushDebugGroup { color, len } => { + ArcComputeCommand::PushDebugGroup { color, len } + } + + ComputeCommand::PopDebugGroup => ArcComputeCommand::PopDebugGroup, + + ComputeCommand::InsertDebugMarker { color, len } => { + ArcComputeCommand::InsertDebugMarker { color, len } + } + + ComputeCommand::WriteTimestamp { + query_set_id, + query_index, + } => ArcComputeCommand::WriteTimestamp { + query_set: query_set_guard.get_owned(query_set_id).map_err(|_| { + ComputePassError { + scope: PassErrorScope::WriteTimestamp, + inner: ComputePassErrorInner::InvalidQuerySet(query_set_id), + } + })?, + query_index, + }, + + ComputeCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + } => ArcComputeCommand::BeginPipelineStatisticsQuery { + query_set: query_set_guard.get_owned(query_set_id).map_err(|_| { + ComputePassError { + scope: PassErrorScope::BeginPipelineStatisticsQuery, + inner: ComputePassErrorInner::InvalidQuerySet(query_set_id), + } + })?, + query_index, + }, + + ComputeCommand::EndPipelineStatisticsQuery => { + ArcComputeCommand::EndPipelineStatisticsQuery + } + }) + }) + .collect::, ComputePassError>>()?; + Ok(resolved_commands) + } +} + +/// Equivalent to `ComputeCommand` but the Ids resolved into resource Arcs. +#[derive(Clone, Debug)] +pub enum ArcComputeCommand { + SetBindGroup { + index: u32, + num_dynamic_offsets: usize, + bind_group: Arc>, + }, + + SetPipeline(Arc>), + + /// Set a range of push constants to values stored in `push_constant_data`. + SetPushConstant { + /// The byte offset within the push constant storage to write to. This + /// must be a multiple of four. + offset: u32, + + /// The number of bytes to write. This must be a multiple of four. + size_bytes: u32, + + /// Index in `push_constant_data` of the start of the data + /// to be written. + /// + /// Note: this is not a byte offset like `offset`. Rather, it is the + /// index of the first `u32` element in `push_constant_data` to read. + values_offset: u32, + }, + + Dispatch([u32; 3]), + + DispatchIndirect { + buffer: Arc>, + offset: wgt::BufferAddress, + }, + + PushDebugGroup { + color: u32, + len: usize, + }, + + PopDebugGroup, + + InsertDebugMarker { + color: u32, + len: usize, + }, + + WriteTimestamp { + query_set: Arc>, + query_index: u32, + }, + + BeginPipelineStatisticsQuery { + query_set: Arc>, + query_index: u32, + }, + + EndPipelineStatisticsQuery, +} + +#[cfg(feature = "trace")] +impl From<&ArcComputeCommand> for ComputeCommand { + fn from(value: &ArcComputeCommand) -> Self { + use crate::resource::Resource as _; + + match value { + ArcComputeCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group, + } => ComputeCommand::SetBindGroup { + index: *index, + num_dynamic_offsets: *num_dynamic_offsets, + bind_group_id: bind_group.as_info().id(), + }, + + ArcComputeCommand::SetPipeline(pipeline) => { + ComputeCommand::SetPipeline(pipeline.as_info().id()) + } + + ArcComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset, + } => ComputeCommand::SetPushConstant { + offset: *offset, + size_bytes: *size_bytes, + values_offset: *values_offset, + }, + + ArcComputeCommand::Dispatch(dim) => ComputeCommand::Dispatch(*dim), + + ArcComputeCommand::DispatchIndirect { buffer, offset } => { + ComputeCommand::DispatchIndirect { + buffer_id: buffer.as_info().id(), + offset: *offset, + } + } + + ArcComputeCommand::PushDebugGroup { color, len } => ComputeCommand::PushDebugGroup { + color: *color, + len: *len, + }, + + ArcComputeCommand::PopDebugGroup => ComputeCommand::PopDebugGroup, + + ArcComputeCommand::InsertDebugMarker { color, len } => { + ComputeCommand::InsertDebugMarker { + color: *color, + len: *len, + } + } + + ArcComputeCommand::WriteTimestamp { + query_set, + query_index, + } => ComputeCommand::WriteTimestamp { + query_set_id: query_set.as_info().id(), + query_index: *query_index, + }, + + ArcComputeCommand::BeginPipelineStatisticsQuery { + query_set, + query_index, + } => ComputeCommand::BeginPipelineStatisticsQuery { + query_set_id: query_set.as_info().id(), + query_index: *query_index, + }, + + ArcComputeCommand::EndPipelineStatisticsQuery => { + ComputeCommand::EndPipelineStatisticsQuery + } + } + } +} diff --git a/third_party/rust/wgpu-core/src/command/mod.rs b/third_party/rust/wgpu-core/src/command/mod.rs index febed4fc97..d53f47bf42 100644 --- a/third_party/rust/wgpu-core/src/command/mod.rs +++ b/third_party/rust/wgpu-core/src/command/mod.rs @@ -1,20 +1,23 @@ +mod allocator; mod bind; mod bundle; mod clear; mod compute; +mod compute_command; mod draw; mod memory_init; mod query; mod render; mod transfer; -use std::slice; use std::sync::Arc; pub(crate) use self::clear::clear_texture; pub use self::{ - bundle::*, clear::ClearError, compute::*, draw::*, query::*, render::*, transfer::*, + bundle::*, clear::ClearError, compute::*, compute_command::ComputeCommand, draw::*, query::*, + render::*, transfer::*, }; +pub(crate) use allocator::CommandAllocator; use self::memory_init::CommandBufferTextureMemoryActions; @@ -22,6 +25,7 @@ use crate::device::{Device, DeviceError}; use crate::error::{ErrorFormatter, PrettyError}; use crate::hub::Hub; use crate::id::CommandBufferId; +use crate::lock::{rank, Mutex}; use crate::snatch::SnatchGuard; use crate::init_tracker::BufferInitTrackerAction; @@ -30,7 +34,6 @@ use crate::track::{Tracker, UsageScope}; use crate::{api_log, global::Global, hal_api::HalApi, id, resource_log, Label}; use hal::CommandEncoder as _; -use parking_lot::Mutex; use thiserror::Error; #[cfg(feature = "trace")] @@ -38,23 +41,122 @@ use crate::device::trace::Command as TraceCommand; const PUSH_CONSTANT_CLEAR_ARRAY: &[u32] = &[0_u32; 64]; +/// The current state of a [`CommandBuffer`]. #[derive(Debug)] pub(crate) enum CommandEncoderStatus { + /// Ready to record commands. An encoder's initial state. + /// + /// Command building methods like [`command_encoder_clear_buffer`] and + /// [`command_encoder_run_compute_pass`] require the encoder to be in this + /// state. + /// + /// [`command_encoder_clear_buffer`]: Global::command_encoder_clear_buffer + /// [`command_encoder_run_compute_pass`]: Global::command_encoder_run_compute_pass Recording, + + /// Command recording is complete, and the buffer is ready for submission. + /// + /// [`Global::command_encoder_finish`] transitions a + /// `CommandBuffer` from the `Recording` state into this state. + /// + /// [`Global::queue_submit`] drops command buffers unless they are + /// in this state. Finished, + + /// An error occurred while recording a compute or render pass. + /// + /// When a `CommandEncoder` is left in this state, we have also + /// returned an error result from the function that encountered + /// the problem. Future attempts to use the encoder (that is, + /// calls to [`CommandBuffer::get_encoder`]) will also return + /// errors. + /// + /// Calling [`Global::command_encoder_finish`] in this state + /// discards the command buffer under construction. Error, } +/// A raw [`CommandEncoder`][rce], and the raw [`CommandBuffer`][rcb]s built from it. +/// +/// Each wgpu-core [`CommandBuffer`] owns an instance of this type, which is +/// where the commands are actually stored. +/// +/// This holds a `Vec` of raw [`CommandBuffer`][rcb]s, not just one. We are not +/// always able to record commands in the order in which they must ultimately be +/// submitted to the queue, but raw command buffers don't permit inserting new +/// commands into the middle of a recorded stream. However, hal queue submission +/// accepts a series of command buffers at once, so we can simply break the +/// stream up into multiple buffers, and then reorder the buffers. See +/// [`CommandEncoder::close_and_swap`] for a specific example of this. +/// +/// Note that a [`CommandEncoderId`] actually refers to a [`CommandBuffer`]. +/// Methods that take a command encoder id actually look up the command buffer, +/// and then use its encoder. +/// +/// [rce]: hal::Api::CommandEncoder +/// [rcb]: hal::Api::CommandBuffer +/// [`CommandEncoderId`]: crate::id::CommandEncoderId pub(crate) struct CommandEncoder { + /// The underlying `wgpu_hal` [`CommandEncoder`]. + /// + /// Successfully executed command buffers' encoders are saved in a + /// [`CommandAllocator`] for recycling. + /// + /// [`CommandEncoder`]: hal::Api::CommandEncoder + /// [`CommandAllocator`]: crate::command::CommandAllocator raw: A::CommandEncoder, + + /// All the raw command buffers for our owning [`CommandBuffer`], in + /// submission order. + /// + /// These command buffers were all constructed with `raw`. The + /// [`wgpu_hal::CommandEncoder`] trait forbids these from outliving `raw`, + /// and requires that we provide all of these when we call + /// [`raw.reset_all()`][CE::ra], so the encoder and its buffers travel + /// together. + /// + /// [CE::ra]: hal::CommandEncoder::reset_all + /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder list: Vec, + + /// True if `raw` is in the "recording" state. + /// + /// See the documentation for [`wgpu_hal::CommandEncoder`] for + /// details on the states `raw` can be in. + /// + /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder is_open: bool, + label: Option, } //TODO: handle errors better impl CommandEncoder { - /// Closes the live encoder + /// Finish the current command buffer, if any, and place it + /// at the second-to-last position in our list. + /// + /// If we have opened this command encoder, finish its current + /// command buffer, and insert it just before the last element in + /// [`self.list`][l]. If this command buffer is closed, do nothing. + /// + /// On return, the underlying hal encoder is closed. + /// + /// What is this for? + /// + /// The `wgpu_hal` contract requires that each render or compute pass's + /// commands be preceded by calls to [`transition_buffers`] and + /// [`transition_textures`], to put the resources the pass operates on in + /// the appropriate state. Unfortunately, we don't know which transitions + /// are needed until we're done recording the pass itself. Rather than + /// iterating over the pass twice, we note the necessary transitions as we + /// record its commands, finish the raw command buffer for the actual pass, + /// record a new raw command buffer for the transitions, and jam that buffer + /// in just before the pass's. This is the function that jams in the + /// transitions' command buffer. + /// + /// [l]: CommandEncoder::list + /// [`transition_buffers`]: hal::CommandEncoder::transition_buffers + /// [`transition_textures`]: hal::CommandEncoder::transition_textures fn close_and_swap(&mut self) -> Result<(), DeviceError> { if self.is_open { self.is_open = false; @@ -65,6 +167,16 @@ impl CommandEncoder { Ok(()) } + /// Finish the current command buffer, if any, and add it to the + /// end of [`self.list`][l]. + /// + /// If we have opened this command encoder, finish its current + /// command buffer, and push it onto the end of [`self.list`][l]. + /// If this command buffer is closed, do nothing. + /// + /// On return, the underlying hal encoder is closed. + /// + /// [l]: CommandEncoder::list fn close(&mut self) -> Result<(), DeviceError> { if self.is_open { self.is_open = false; @@ -75,6 +187,9 @@ impl CommandEncoder { Ok(()) } + /// Discard the command buffer under construction, if any. + /// + /// The underlying hal encoder is closed, if it was recording. pub(crate) fn discard(&mut self) { if self.is_open { self.is_open = false; @@ -82,7 +197,10 @@ impl CommandEncoder { } } - fn open(&mut self) -> Result<&mut A::CommandEncoder, DeviceError> { + /// Begin recording a new command buffer, if we haven't already. + /// + /// The underlying hal encoder is put in the "recording" state. + pub(crate) fn open(&mut self) -> Result<&mut A::CommandEncoder, DeviceError> { if !self.is_open { self.is_open = true; let label = self.label.as_deref(); @@ -92,6 +210,10 @@ impl CommandEncoder { Ok(&mut self.raw) } + /// Begin recording a new command buffer for a render pass, with + /// its own label. + /// + /// The underlying hal encoder is put in the "recording" state. fn open_pass(&mut self, label: Option<&str>) -> Result<(), DeviceError> { self.is_open = true; unsafe { self.raw.begin_encoding(label)? }; @@ -100,7 +222,7 @@ impl CommandEncoder { } } -pub struct BakedCommands { +pub(crate) struct BakedCommands { pub(crate) encoder: A::CommandEncoder, pub(crate) list: Vec, pub(crate) trackers: Tracker, @@ -111,12 +233,29 @@ pub struct BakedCommands { pub(crate) struct DestroyedBufferError(pub id::BufferId); pub(crate) struct DestroyedTextureError(pub id::TextureId); +/// The mutable state of a [`CommandBuffer`]. pub struct CommandBufferMutable { + /// The [`wgpu_hal::Api::CommandBuffer`]s we've built so far, and the encoder + /// they belong to. + /// + /// [`wgpu_hal::Api::CommandBuffer`]: hal::Api::CommandBuffer pub(crate) encoder: CommandEncoder, + + /// The current state of this command buffer's encoder. status: CommandEncoderStatus, + + /// All the resources that the commands recorded so far have referred to. pub(crate) trackers: Tracker, + + /// The regions of buffers and textures these commands will read and write. + /// + /// This is used to determine which portions of which + /// buffers/textures we actually need to initialize. If we're + /// definitely going to write to something before we read from it, + /// we don't need to clear its contents. buffer_memory_init_actions: Vec>, texture_memory_actions: CommandBufferTextureMemoryActions, + pub(crate) pending_query_resets: QueryResetMap, #[cfg(feature = "trace")] pub(crate) commands: Option>, @@ -133,11 +272,36 @@ impl CommandBufferMutable { } } +/// A buffer of commands to be submitted to the GPU for execution. +/// +/// Whereas the WebGPU API uses two separate types for command buffers and +/// encoders, this type is a fusion of the two: +/// +/// - During command recording, this holds a [`CommandEncoder`] accepting this +/// buffer's commands. In this state, the [`CommandBuffer`] type behaves like +/// a WebGPU `GPUCommandEncoder`. +/// +/// - Once command recording is finished by calling +/// [`Global::command_encoder_finish`], no further recording is allowed. The +/// internal [`CommandEncoder`] is retained solely as a storage pool for the +/// raw command buffers. In this state, the value behaves like a WebGPU +/// `GPUCommandBuffer`. +/// +/// - Once a command buffer is submitted to the queue, it is removed from the id +/// registry, and its contents are taken to construct a [`BakedCommands`], +/// whose contents eventually become the property of the submission queue. pub struct CommandBuffer { pub(crate) device: Arc>, limits: wgt::Limits, support_clear_texture: bool, pub(crate) info: ResourceInfo>, + + /// The mutable state of this command buffer. + /// + /// This `Option` is populated when the command buffer is first created. + /// When this is submitted, dropped, or destroyed, its contents are + /// extracted into a [`BakedCommands`] by + /// [`CommandBuffer::extract_baked_commands`]. pub(crate) data: Mutex>>, } @@ -176,25 +340,28 @@ impl CommandBuffer { .as_str(), None, ), - data: Mutex::new(Some(CommandBufferMutable { - encoder: CommandEncoder { - raw: encoder, - is_open: false, - list: Vec::new(), - label, - }, - status: CommandEncoderStatus::Recording, - trackers: Tracker::new(), - buffer_memory_init_actions: Default::default(), - texture_memory_actions: Default::default(), - pending_query_resets: QueryResetMap::new(), - #[cfg(feature = "trace")] - commands: if enable_tracing { - Some(Vec::new()) - } else { - None - }, - })), + data: Mutex::new( + rank::COMMAND_BUFFER_DATA, + Some(CommandBufferMutable { + encoder: CommandEncoder { + raw: encoder, + is_open: false, + list: Vec::new(), + label, + }, + status: CommandEncoderStatus::Recording, + trackers: Tracker::new(), + buffer_memory_init_actions: Default::default(), + texture_memory_actions: Default::default(), + pending_query_resets: QueryResetMap::new(), + #[cfg(feature = "trace")] + commands: if enable_tracing { + Some(Vec::new()) + } else { + None + }, + }), + ), } } @@ -248,12 +415,18 @@ impl CommandBuffer { } impl CommandBuffer { + /// Return the [`CommandBuffer`] for `id`, for recording new commands. + /// + /// In `wgpu_core`, the [`CommandBuffer`] type serves both as encoder and + /// buffer, which is why this function takes an [`id::CommandEncoderId`] but + /// returns a [`CommandBuffer`]. The returned command buffer must be in the + /// "recording" state. Otherwise, an error is returned. fn get_encoder( hub: &Hub, id: id::CommandEncoderId, ) -> Result, CommandEncoderError> { let storage = hub.command_buffers.read(); - match storage.get(id.transmute()) { + match storage.get(id.into_command_buffer_id()) { Ok(cmd_buf) => match cmd_buf.data.lock().as_ref().unwrap().status { CommandEncoderStatus::Recording => Ok(cmd_buf.clone()), CommandEncoderStatus::Finished => Err(CommandEncoderError::NotRecording), @@ -286,11 +459,9 @@ impl CommandBuffer { } pub(crate) fn from_arc_into_baked(self: Arc) -> BakedCommands { - if let Some(mut command_buffer) = Arc::into_inner(self) { - command_buffer.extract_baked_commands() - } else { - panic!("CommandBuffer cannot be destroyed because is still in use"); - } + let mut command_buffer = Arc::into_inner(self) + .expect("CommandBuffer cannot be destroyed because is still in use"); + command_buffer.extract_baked_commands() } } @@ -418,7 +589,7 @@ impl Global { let hub = A::hub(self); - let error = match hub.command_buffers.get(encoder_id.transmute()) { + let error = match hub.command_buffers.get(encoder_id.into_command_buffer_id()) { Ok(cmd_buf) => { let mut cmd_buf_data = cmd_buf.data.lock(); let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); @@ -444,7 +615,7 @@ impl Global { Err(_) => Some(CommandEncoderError::Invalid), }; - (encoder_id.transmute(), error) + (encoder_id.into_command_buffer_id(), error) } pub fn command_encoder_push_debug_group( @@ -599,16 +770,15 @@ impl BindGroupStateChange { } } - unsafe fn set_and_check_redundant( + fn set_and_check_redundant( &mut self, bind_group_id: id::BindGroupId, index: u32, dynamic_offsets: &mut Vec, - offsets: *const wgt::DynamicOffset, - offset_length: usize, + offsets: &[wgt::DynamicOffset], ) -> bool { // For now never deduplicate bind groups with dynamic offsets. - if offset_length == 0 { + if offsets.is_empty() { // If this get returns None, that means we're well over the limit, // so let the call through to get a proper error if let Some(current_bind_group) = self.last_states.get_mut(index as usize) { @@ -624,8 +794,7 @@ impl BindGroupStateChange { if let Some(current_bind_group) = self.last_states.get_mut(index as usize) { current_bind_group.reset(); } - dynamic_offsets - .extend_from_slice(unsafe { slice::from_raw_parts(offsets, offset_length) }); + dynamic_offsets.extend_from_slice(offsets); } false } @@ -700,7 +869,7 @@ impl PrettyError for PassErrorScope { // This error is not in the error chain, only notes are needed match *self { Self::Pass(id) => { - fmt.command_buffer_label(&id.transmute()); + fmt.command_buffer_label(&id.into_command_buffer_id()); } Self::SetBindGroup(id) => { fmt.bind_group_label(&id); diff --git a/third_party/rust/wgpu-core/src/command/query.rs b/third_party/rust/wgpu-core/src/command/query.rs index 89cba6fbf3..fd3360cc00 100644 --- a/third_party/rust/wgpu-core/src/command/query.rs +++ b/third_party/rust/wgpu-core/src/command/query.rs @@ -9,7 +9,7 @@ use crate::{ hal_api::HalApi, id::{self, Id}, init_tracker::MemoryInitKind, - resource::QuerySet, + resource::{QuerySet, Resource}, storage::Storage, Epoch, FastHashMap, Index, }; @@ -429,11 +429,20 @@ impl Global { .add_single(&*query_set_guard, query_set_id) .ok_or(QueryError::InvalidQuerySet(query_set_id))?; + if query_set.device.as_info().id() != cmd_buf.device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + let (dst_buffer, dst_pending) = { let buffer_guard = hub.buffers.read(); let dst_buffer = buffer_guard .get(destination) .map_err(|_| QueryError::InvalidBuffer(destination))?; + + if dst_buffer.device.as_info().id() != cmd_buf.device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + tracker .buffers .set_single(dst_buffer, hal::BufferUses::COPY_DST) diff --git a/third_party/rust/wgpu-core/src/command/render.rs b/third_party/rust/wgpu-core/src/command/render.rs index 7e859e3cc8..87dc9aac16 100644 --- a/third_party/rust/wgpu-core/src/command/render.rs +++ b/third_party/rust/wgpu-core/src/command/render.rs @@ -1343,7 +1343,8 @@ impl Global { let hub = A::hub(self); - let cmd_buf = CommandBuffer::get_encoder(hub, encoder_id).map_pass_err(pass_scope)?; + let cmd_buf: Arc> = + CommandBuffer::get_encoder(hub, encoder_id).map_pass_err(pass_scope)?; let device = &cmd_buf.device; let snatch_guard = device.snatchable_lock.read(); @@ -2409,7 +2410,10 @@ impl Global { (trackers, pending_discard_init_fixups) }; - let cmd_buf = hub.command_buffers.get(encoder_id.transmute()).unwrap(); + let cmd_buf = hub + .command_buffers + .get(encoder_id.into_command_buffer_id()) + .unwrap(); let mut cmd_buf_data = cmd_buf.data.lock(); let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); @@ -2455,36 +2459,27 @@ impl Global { } } -pub mod render_ffi { +pub mod render_commands { use super::{ super::{Rect, RenderCommand}, RenderPass, }; - use crate::{id, RawString}; - use std::{convert::TryInto, ffi, num::NonZeroU32, slice}; + use crate::id; + use std::{convert::TryInto, num::NonZeroU32}; use wgt::{BufferAddress, BufferSize, Color, DynamicOffset, IndexFormat}; - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given pointer is - /// valid for `offset_length` elements. - #[no_mangle] - pub unsafe extern "C" fn wgpu_render_pass_set_bind_group( + pub fn wgpu_render_pass_set_bind_group( pass: &mut RenderPass, index: u32, bind_group_id: id::BindGroupId, - offsets: *const DynamicOffset, - offset_length: usize, + offsets: &[DynamicOffset], ) { - let redundant = unsafe { - pass.current_bind_groups.set_and_check_redundant( - bind_group_id, - index, - &mut pass.base.dynamic_offsets, - offsets, - offset_length, - ) - }; + let redundant = pass.current_bind_groups.set_and_check_redundant( + bind_group_id, + index, + &mut pass.base.dynamic_offsets, + offsets, + ); if redundant { return; @@ -2492,16 +2487,12 @@ pub mod render_ffi { pass.base.commands.push(RenderCommand::SetBindGroup { index, - num_dynamic_offsets: offset_length, + num_dynamic_offsets: offsets.len(), bind_group_id, }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_pipeline( - pass: &mut RenderPass, - pipeline_id: id::RenderPipelineId, - ) { + pub fn wgpu_render_pass_set_pipeline(pass: &mut RenderPass, pipeline_id: id::RenderPipelineId) { if pass.current_pipeline.set_and_check_redundant(pipeline_id) { return; } @@ -2511,8 +2502,7 @@ pub mod render_ffi { .push(RenderCommand::SetPipeline(pipeline_id)); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_vertex_buffer( + pub fn wgpu_render_pass_set_vertex_buffer( pass: &mut RenderPass, slot: u32, buffer_id: id::BufferId, @@ -2527,8 +2517,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_index_buffer( + pub fn wgpu_render_pass_set_index_buffer( pass: &mut RenderPass, buffer: id::BufferId, index_format: IndexFormat, @@ -2538,22 +2527,19 @@ pub mod render_ffi { pass.set_index_buffer(buffer, index_format, offset, size); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_blend_constant(pass: &mut RenderPass, color: &Color) { + pub fn wgpu_render_pass_set_blend_constant(pass: &mut RenderPass, color: &Color) { pass.base .commands .push(RenderCommand::SetBlendConstant(*color)); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_stencil_reference(pass: &mut RenderPass, value: u32) { + pub fn wgpu_render_pass_set_stencil_reference(pass: &mut RenderPass, value: u32) { pass.base .commands .push(RenderCommand::SetStencilReference(value)); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_viewport( + pub fn wgpu_render_pass_set_viewport( pass: &mut RenderPass, x: f32, y: f32, @@ -2569,8 +2555,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_set_scissor_rect( + pub fn wgpu_render_pass_set_scissor_rect( pass: &mut RenderPass, x: u32, y: u32, @@ -2582,17 +2567,11 @@ pub mod render_ffi { .push(RenderCommand::SetScissor(Rect { x, y, w, h })); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given pointer is - /// valid for `size_bytes` bytes. - #[no_mangle] - pub unsafe extern "C" fn wgpu_render_pass_set_push_constants( + pub fn wgpu_render_pass_set_push_constants( pass: &mut RenderPass, stages: wgt::ShaderStages, offset: u32, - size_bytes: u32, - data: *const u8, + data: &[u8], ) { assert_eq!( offset & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), @@ -2600,31 +2579,28 @@ pub mod render_ffi { "Push constant offset must be aligned to 4 bytes." ); assert_eq!( - size_bytes & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), + data.len() as u32 & (wgt::PUSH_CONSTANT_ALIGNMENT - 1), 0, "Push constant size must be aligned to 4 bytes." ); - let data_slice = unsafe { slice::from_raw_parts(data, size_bytes as usize) }; let value_offset = pass.base.push_constant_data.len().try_into().expect( "Ran out of push constant space. Don't set 4gb of push constants per RenderPass.", ); pass.base.push_constant_data.extend( - data_slice - .chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) + data.chunks_exact(wgt::PUSH_CONSTANT_ALIGNMENT as usize) .map(|arr| u32::from_ne_bytes([arr[0], arr[1], arr[2], arr[3]])), ); pass.base.commands.push(RenderCommand::SetPushConstant { stages, offset, - size_bytes, + size_bytes: data.len() as u32, values_offset: Some(value_offset), }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_draw( + pub fn wgpu_render_pass_draw( pass: &mut RenderPass, vertex_count: u32, instance_count: u32, @@ -2639,8 +2615,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_draw_indexed( + pub fn wgpu_render_pass_draw_indexed( pass: &mut RenderPass, index_count: u32, instance_count: u32, @@ -2657,8 +2632,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_draw_indirect( + pub fn wgpu_render_pass_draw_indirect( pass: &mut RenderPass, buffer_id: id::BufferId, offset: BufferAddress, @@ -2671,8 +2645,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_draw_indexed_indirect( + pub fn wgpu_render_pass_draw_indexed_indirect( pass: &mut RenderPass, buffer_id: id::BufferId, offset: BufferAddress, @@ -2685,8 +2658,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_multi_draw_indirect( + pub fn wgpu_render_pass_multi_draw_indirect( pass: &mut RenderPass, buffer_id: id::BufferId, offset: BufferAddress, @@ -2700,8 +2672,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_multi_draw_indexed_indirect( + pub fn wgpu_render_pass_multi_draw_indexed_indirect( pass: &mut RenderPass, buffer_id: id::BufferId, offset: BufferAddress, @@ -2715,8 +2686,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_multi_draw_indirect_count( + pub fn wgpu_render_pass_multi_draw_indirect_count( pass: &mut RenderPass, buffer_id: id::BufferId, offset: BufferAddress, @@ -2736,8 +2706,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_multi_draw_indexed_indirect_count( + pub fn wgpu_render_pass_multi_draw_indexed_indirect_count( pass: &mut RenderPass, buffer_id: id::BufferId, offset: BufferAddress, @@ -2757,17 +2726,8 @@ pub mod render_ffi { }); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given `label` - /// is a valid null-terminated string. - #[no_mangle] - pub unsafe extern "C" fn wgpu_render_pass_push_debug_group( - pass: &mut RenderPass, - label: RawString, - color: u32, - ) { - let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pub fn wgpu_render_pass_push_debug_group(pass: &mut RenderPass, label: &str, color: u32) { + let bytes = label.as_bytes(); pass.base.string_data.extend_from_slice(bytes); pass.base.commands.push(RenderCommand::PushDebugGroup { @@ -2776,22 +2736,12 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_pop_debug_group(pass: &mut RenderPass) { + pub fn wgpu_render_pass_pop_debug_group(pass: &mut RenderPass) { pass.base.commands.push(RenderCommand::PopDebugGroup); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given `label` - /// is a valid null-terminated string. - #[no_mangle] - pub unsafe extern "C" fn wgpu_render_pass_insert_debug_marker( - pass: &mut RenderPass, - label: RawString, - color: u32, - ) { - let bytes = unsafe { ffi::CStr::from_ptr(label) }.to_bytes(); + pub fn wgpu_render_pass_insert_debug_marker(pass: &mut RenderPass, label: &str, color: u32) { + let bytes = label.as_bytes(); pass.base.string_data.extend_from_slice(bytes); pass.base.commands.push(RenderCommand::InsertDebugMarker { @@ -2800,8 +2750,7 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_write_timestamp( + pub fn wgpu_render_pass_write_timestamp( pass: &mut RenderPass, query_set_id: id::QuerySetId, query_index: u32, @@ -2812,23 +2761,17 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_begin_occlusion_query( - pass: &mut RenderPass, - query_index: u32, - ) { + pub fn wgpu_render_pass_begin_occlusion_query(pass: &mut RenderPass, query_index: u32) { pass.base .commands .push(RenderCommand::BeginOcclusionQuery { query_index }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_end_occlusion_query(pass: &mut RenderPass) { + pub fn wgpu_render_pass_end_occlusion_query(pass: &mut RenderPass) { pass.base.commands.push(RenderCommand::EndOcclusionQuery); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_begin_pipeline_statistics_query( + pub fn wgpu_render_pass_begin_pipeline_statistics_query( pass: &mut RenderPass, query_set_id: id::QuerySetId, query_index: u32, @@ -2841,26 +2784,17 @@ pub mod render_ffi { }); } - #[no_mangle] - pub extern "C" fn wgpu_render_pass_end_pipeline_statistics_query(pass: &mut RenderPass) { + pub fn wgpu_render_pass_end_pipeline_statistics_query(pass: &mut RenderPass) { pass.base .commands .push(RenderCommand::EndPipelineStatisticsQuery); } - /// # Safety - /// - /// This function is unsafe as there is no guarantee that the given pointer is - /// valid for `render_bundle_ids_length` elements. - #[no_mangle] - pub unsafe extern "C" fn wgpu_render_pass_execute_bundles( + pub fn wgpu_render_pass_execute_bundles( pass: &mut RenderPass, - render_bundle_ids: *const id::RenderBundleId, - render_bundle_ids_length: usize, + render_bundle_ids: &[id::RenderBundleId], ) { - for &bundle_id in - unsafe { slice::from_raw_parts(render_bundle_ids, render_bundle_ids_length) } - { + for &bundle_id in render_bundle_ids { pass.base .commands .push(RenderCommand::ExecuteBundle(bundle_id)); diff --git a/third_party/rust/wgpu-core/src/command/transfer.rs b/third_party/rust/wgpu-core/src/command/transfer.rs index 8e98a4c9b9..84bc88e723 100644 --- a/third_party/rust/wgpu-core/src/command/transfer.rs +++ b/third_party/rust/wgpu-core/src/command/transfer.rs @@ -607,6 +607,11 @@ impl Global { let src_buffer = buffer_guard .get(source) .map_err(|_| TransferError::InvalidBuffer(source))?; + + if src_buffer.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + cmd_buf_data .trackers .buffers @@ -628,6 +633,11 @@ impl Global { let dst_buffer = buffer_guard .get(destination) .map_err(|_| TransferError::InvalidBuffer(destination))?; + + if dst_buffer.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + cmd_buf_data .trackers .buffers @@ -777,6 +787,10 @@ impl Global { .get(destination.texture) .map_err(|_| TransferError::InvalidTexture(destination.texture))?; + if dst_texture.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + let (hal_copy_size, array_layer_count) = validate_texture_copy_range( destination, &dst_texture.desc, @@ -807,6 +821,11 @@ impl Global { let src_buffer = buffer_guard .get(source.buffer) .map_err(|_| TransferError::InvalidBuffer(source.buffer))?; + + if src_buffer.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + tracker .buffers .set_single(src_buffer, hal::BufferUses::COPY_SRC) @@ -938,6 +957,10 @@ impl Global { .get(source.texture) .map_err(|_| TransferError::InvalidTexture(source.texture))?; + if src_texture.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + let (hal_copy_size, array_layer_count) = validate_texture_copy_range(source, &src_texture.desc, CopySide::Source, copy_size)?; @@ -989,6 +1012,11 @@ impl Global { let dst_buffer = buffer_guard .get(destination.buffer) .map_err(|_| TransferError::InvalidBuffer(destination.buffer))?; + + if dst_buffer.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + tracker .buffers .set_single(dst_buffer, hal::BufferUses::COPY_DST) @@ -1117,6 +1145,13 @@ impl Global { .get(destination.texture) .map_err(|_| TransferError::InvalidTexture(source.texture))?; + if src_texture.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + if dst_texture.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + // src and dst texture format must be copy-compatible // https://gpuweb.github.io/gpuweb/#copy-compatible if src_texture.desc.format.remove_srgb_suffix() diff --git a/third_party/rust/wgpu-core/src/device/any_device.rs b/third_party/rust/wgpu-core/src/device/any_device.rs index 693155a753..9e459c1a94 100644 --- a/third_party/rust/wgpu-core/src/device/any_device.rs +++ b/third_party/rust/wgpu-core/src/device/any_device.rs @@ -34,7 +34,7 @@ impl AnyDevice { unsafe fn drop_glue(ptr: *mut ()) { // Drop the arc this instance is holding. unsafe { - _ = Arc::from_raw(ptr.cast::()); + _ = Arc::from_raw(ptr.cast::()); } } diff --git a/third_party/rust/wgpu-core/src/device/bgl.rs b/third_party/rust/wgpu-core/src/device/bgl.rs index d606f049a3..911ac8a435 100644 --- a/third_party/rust/wgpu-core/src/device/bgl.rs +++ b/third_party/rust/wgpu-core/src/device/bgl.rs @@ -58,7 +58,7 @@ impl EntryMap { assert!(self.sorted); } - /// Create a new [`BindGroupLayoutEntryMap`] from a slice of [`wgt::BindGroupLayoutEntry`]s. + /// Create a new [`EntryMap`] from a slice of [`wgt::BindGroupLayoutEntry`]s. /// /// Errors if there are duplicate bindings or if any binding index is greater than /// the device's limits. diff --git a/third_party/rust/wgpu-core/src/device/global.rs b/third_party/rust/wgpu-core/src/device/global.rs index 0c97e1b504..be524840b8 100644 --- a/third_party/rust/wgpu-core/src/device/global.rs +++ b/third_party/rust/wgpu-core/src/device/global.rs @@ -11,6 +11,7 @@ use crate::{ id::{self, AdapterId, DeviceId, QueueId, SurfaceId}, init_tracker::TextureInitTracker, instance::{self, Adapter, Surface}, + lock::{rank, RwLock}, pipeline, present, resource::{self, BufferAccessResult}, resource::{BufferAccessError, BufferMapOperation, CreateBufferError, Resource}, @@ -20,7 +21,6 @@ use crate::{ use arrayvec::ArrayVec; use hal::Device as _; -use parking_lot::RwLock; use wgt::{BufferAddress, TextureFormat}; @@ -257,7 +257,7 @@ impl Global { hal::BufferUses::COPY_DST }; - let (id, resource) = fid.assign(buffer); + let (id, resource) = fid.assign(Arc::new(buffer)); api_log!("Device::create_buffer({desc:?}) -> {id:?}"); device @@ -572,7 +572,7 @@ impl Global { Err(error) => break error, }; - let (id, resource) = fid.assign(texture); + let (id, resource) = fid.assign(Arc::new(texture)); api_log!("Device::create_texture({desc:?}) -> {id:?}"); device @@ -643,10 +643,12 @@ impl Global { texture.hal_usage |= hal::TextureUses::COPY_DST; } - texture.initialization_status = - RwLock::new(TextureInitTracker::new(desc.mip_level_count, 0)); + texture.initialization_status = RwLock::new( + rank::TEXTURE_INITIALIZATION_STATUS, + TextureInitTracker::new(desc.mip_level_count, 0), + ); - let (id, resource) = fid.assign(texture); + let (id, resource) = fid.assign(Arc::new(texture)); api_log!("Device::create_texture({desc:?}) -> {id:?}"); device @@ -699,7 +701,7 @@ impl Global { let buffer = device.create_buffer_from_hal(hal_buffer, desc); - let (id, buffer) = fid.assign(buffer); + let (id, buffer) = fid.assign(Arc::new(buffer)); api_log!("Device::create_buffer -> {id:?}"); device @@ -818,7 +820,7 @@ impl Global { Err(e) => break e, }; - let (id, resource) = fid.assign(view); + let (id, resource) = fid.assign(Arc::new(view)); { let mut views = texture.views.lock(); @@ -900,7 +902,7 @@ impl Global { Err(e) => break e, }; - let (id, resource) = fid.assign(sampler); + let (id, resource) = fid.assign(Arc::new(sampler)); api_log!("Device::create_sampler -> {id:?}"); device.trackers.lock().samplers.insert_single(resource); @@ -982,7 +984,7 @@ impl Global { let bgl = device.create_bind_group_layout(&desc.label, entry_map, bgl::Origin::Pool)?; - let (id_inner, arc) = fid.take().unwrap().assign(bgl); + let (id_inner, arc) = fid.take().unwrap().assign(Arc::new(bgl)); id = Some(id_inner); Ok(arc) @@ -1063,7 +1065,7 @@ impl Global { Err(e) => break e, }; - let (id, _) = fid.assign(layout); + let (id, _) = fid.assign(Arc::new(layout)); api_log!("Device::create_pipeline_layout -> {id:?}"); return (id, None); }; @@ -1130,7 +1132,7 @@ impl Global { Err(e) => break e, }; - let (id, resource) = fid.assign(bind_group); + let (id, resource) = fid.assign(Arc::new(bind_group)); let weak_ref = Arc::downgrade(&resource); for range in &resource.used_texture_ranges { @@ -1170,6 +1172,20 @@ impl Global { } } + /// Create a shader module with the given `source`. + /// + ///
+ // NOTE: Keep this in sync with `naga::front::wgsl::parse_str`! + // NOTE: Keep this in sync with `wgpu::Device::create_shader_module`! + /// + /// This function may consume a lot of stack space. Compiler-enforced limits for parsing + /// recursion exist; if shader compilation runs into them, it will return an error gracefully. + /// However, on some build profiles and platforms, the default stack size for a thread may be + /// exceeded before this limit is reached during parsing. Callers should ensure that there is + /// enough stack space for this, particularly if calls to this method are exposed to user + /// input. + /// + ///
pub fn device_create_shader_module( &self, device_id: DeviceId, @@ -1231,7 +1247,7 @@ impl Global { Err(e) => break e, }; - let (id, _) = fid.assign(shader); + let (id, _) = fid.assign(Arc::new(shader)); api_log!("Device::create_shader_module -> {id:?}"); return (id, None); }; @@ -1288,7 +1304,7 @@ impl Global { Ok(shader) => shader, Err(e) => break e, }; - let (id, _) = fid.assign(shader); + let (id, _) = fid.assign(Arc::new(shader)); api_log!("Device::create_shader_module_spirv -> {id:?}"); return (id, None); }; @@ -1320,7 +1336,9 @@ impl Global { profiling::scope!("Device::create_command_encoder"); let hub = A::hub(self); - let fid = hub.command_buffers.prepare(id_in.map(|id| id.transmute())); + let fid = hub + .command_buffers + .prepare(id_in.map(|id| id.into_command_buffer_id())); let error = loop { let device = match hub.devices.get(device_id) { @@ -1335,9 +1353,6 @@ impl Global { }; let encoder = match device .command_allocator - .lock() - .as_mut() - .unwrap() .acquire_encoder(device.raw(), queue.raw.as_ref().unwrap()) { Ok(raw) => raw, @@ -1353,13 +1368,13 @@ impl Global { .map(|s| s.to_string()), ); - let (id, _) = fid.assign(command_buffer); + let (id, _) = fid.assign(Arc::new(command_buffer)); api_log!("Device::create_command_encoder -> {id:?}"); - return (id.transmute(), None); + return (id.into_command_encoder_id(), None); }; let id = fid.assign_error(desc.label.borrow_or_default()); - (id.transmute(), Some(error)) + (id.into_command_encoder_id(), Some(error)) } pub fn command_buffer_label(&self, id: id::CommandBufferId) -> String { @@ -1374,7 +1389,7 @@ impl Global { if let Some(cmd_buf) = hub .command_buffers - .unregister(command_encoder_id.transmute()) + .unregister(command_encoder_id.into_command_buffer_id()) { cmd_buf.data.lock().as_mut().unwrap().encoder.discard(); cmd_buf @@ -1386,7 +1401,7 @@ impl Global { pub fn command_buffer_drop(&self, command_buffer_id: id::CommandBufferId) { profiling::scope!("CommandBuffer::drop"); api_log!("CommandBuffer::drop {command_buffer_id:?}"); - self.command_encoder_drop::
(command_buffer_id.transmute()) + self.command_encoder_drop::(command_buffer_id.into_command_encoder_id()) } pub fn device_create_render_bundle_encoder( @@ -1446,7 +1461,7 @@ impl Global { Err(e) => break e, }; - let (id, resource) = fid.assign(render_bundle); + let (id, resource) = fid.assign(Arc::new(render_bundle)); api_log!("RenderBundleEncoder::finish -> {id:?}"); device.trackers.lock().bundles.insert_single(resource); return (id, None); @@ -1509,7 +1524,7 @@ impl Global { Err(err) => break err, }; - let (id, resource) = fid.assign(query_set); + let (id, resource) = fid.assign(Arc::new(query_set)); api_log!("Device::create_query_set -> {id:?}"); device.trackers.lock().query_sets.insert_single(resource); @@ -1587,7 +1602,7 @@ impl Global { Err(e) => break e, }; - let (id, resource) = fid.assign(pipeline); + let (id, resource) = fid.assign(Arc::new(pipeline)); api_log!("Device::create_render_pipeline -> {id:?}"); device @@ -1720,7 +1735,7 @@ impl Global { Err(e) => break e, }; - let (id, resource) = fid.assign(pipeline); + let (id, resource) = fid.assign(Arc::new(pipeline)); api_log!("Device::create_compute_pipeline -> {id:?}"); device @@ -1956,7 +1971,7 @@ impl Global { }; let caps = unsafe { - let suf = A::get_surface(surface); + let suf = A::surface_as_hal(surface); let adapter = &device.adapter; match adapter.raw.adapter.surface_capabilities(suf.unwrap()) { Some(caps) => caps, @@ -2018,7 +2033,6 @@ impl Global { // Wait for all work to finish before configuring the surface. let snatch_guard = device.snatchable_lock.read(); let fence = device.fence.read(); - let fence = fence.as_ref().unwrap(); match device.maintain(fence, wgt::Maintain::Wait, snatch_guard) { Ok((closures, _)) => { user_callbacks = closures; @@ -2042,7 +2056,7 @@ impl Global { // https://github.com/gfx-rs/wgpu/issues/4105 match unsafe { - A::get_surface(surface) + A::surface_as_hal(surface) .unwrap() .configure(device.raw(), &hal_config) } { @@ -2107,7 +2121,7 @@ impl Global { .map_err(|_| DeviceError::Invalid)?; if let wgt::Maintain::WaitForSubmissionIndex(submission_index) = maintain { - if submission_index.queue_id != device_id.transmute() { + if submission_index.queue_id != device_id.into_queue_id() { return Err(WaitIdleError::WrongSubmissionIndex( submission_index.queue_id, device_id, @@ -2131,7 +2145,6 @@ impl Global { ) -> Result { let snatch_guard = device.snatchable_lock.read(); let fence = device.fence.read(); - let fence = fence.as_ref().unwrap(); let (closures, queue_empty) = device.maintain(fence, maintain, snatch_guard)?; // Some deferred destroys are scheduled in maintain so run this right after diff --git a/third_party/rust/wgpu-core/src/device/life.rs b/third_party/rust/wgpu-core/src/device/life.rs index af345015df..0df580e6e6 100644 --- a/third_party/rust/wgpu-core/src/device/life.rs +++ b/third_party/rust/wgpu-core/src/device/life.rs @@ -7,6 +7,7 @@ use crate::{ }, hal_api::HalApi, id, + lock::Mutex, pipeline::{ComputePipeline, RenderPipeline}, resource::{ self, Buffer, DestroyedBuffer, DestroyedTexture, QuerySet, Resource, Sampler, @@ -18,12 +19,10 @@ use crate::{ }; use smallvec::SmallVec; -use parking_lot::Mutex; use std::sync::Arc; use thiserror::Error; /// A struct that keeps lists of resources that are no longer needed by the user. -#[derive(Default)] pub(crate) struct ResourceMaps { pub buffers: FastHashMap>>, pub staging_buffers: FastHashMap>>, @@ -94,7 +93,7 @@ impl ResourceMaps { destroyed_textures.clear(); } - pub(crate) fn extend(&mut self, mut other: Self) { + pub(crate) fn extend(&mut self, other: &mut Self) { let ResourceMaps { buffers, staging_buffers, @@ -128,7 +127,37 @@ impl ResourceMaps { } } -/// Resources used by a queue submission, and work to be done once it completes. +/// A command submitted to the GPU for execution. +/// +/// ## Keeping resources alive while the GPU is using them +/// +/// [`wgpu_hal`] requires that, when a command is submitted to a queue, all the +/// resources it uses must remain alive until it has finished executing. +/// +/// The natural way to satisfy this would be for `ActiveSubmission` to hold +/// strong references to all the resources used by its commands. However, that +/// would entail dropping those strong references every time a queue submission +/// finishes, adjusting the reference counts of all the resources it used. This +/// is usually needless work: it's rare for the active submission queue to be +/// the final reference to an object. Usually the user is still holding on to +/// it. +/// +/// To avoid this, an `ActiveSubmission` does not initially hold any strong +/// references to its commands' resources. Instead, each resource tracks the +/// most recent submission index at which it has been used in +/// [`ResourceInfo::submission_index`]. When the user drops a resource, if the +/// submission in which it was last used is still present in the device's queue, +/// we add the resource to [`ActiveSubmission::last_resources`]. Finally, when +/// this `ActiveSubmission` is dequeued and dropped in +/// [`LifetimeTracker::triage_submissions`], we drop `last_resources` along with +/// it. Thus, unless a resource is dropped by the user, it doesn't need to be +/// touched at all when processing completed work. +/// +/// However, it's not clear that this is effective. See [#5560]. +/// +/// [`wgpu_hal`]: hal +/// [`ResourceInfo::submission_index`]: crate::resource::ResourceInfo +/// [#5560]: https://github.com/gfx-rs/wgpu/issues/5560 struct ActiveSubmission { /// The index of the submission we track. /// @@ -150,6 +179,18 @@ struct ActiveSubmission { /// Buffers to be mapped once this submission has completed. mapped: Vec>>, + /// Command buffers used by this submission, and the encoder that owns them. + /// + /// [`wgpu_hal::Queue::submit`] requires the submitted command buffers to + /// remain alive until the submission has completed execution. Command + /// encoders double as allocation pools for command buffers, so holding them + /// here and cleaning them up in [`LifetimeTracker::triage_submissions`] + /// satisfies that requirement. + /// + /// Once this submission has completed, the command buffers are reset and + /// the command encoder is recycled. + /// + /// [`wgpu_hal::Queue::submit`]: hal::Queue::submit encoders: Vec>, /// List of queue "on_submitted_work_done" closures to be called once this @@ -330,28 +371,25 @@ impl LifetimeTracker { /// /// Assume that all submissions up through `last_done` have completed. /// - /// - Buffers used by those submissions are now ready to map, if - /// requested. Add any buffers in the submission's [`mapped`] list to - /// [`self.ready_to_map`], where [`LifetimeTracker::handle_mapping`] will find - /// them. + /// - Buffers used by those submissions are now ready to map, if requested. + /// Add any buffers in the submission's [`mapped`] list to + /// [`self.ready_to_map`], where [`LifetimeTracker::handle_mapping`] + /// will find them. /// /// - Resources whose final use was in those submissions are now ready to - /// free. Add any resources in the submission's [`last_resources`] table - /// to [`self.free_resources`], where [`LifetimeTracker::cleanup`] will find - /// them. + /// free. Dropping the submission's [`last_resources`] table does so. /// /// Return a list of [`SubmittedWorkDoneClosure`]s to run. /// /// [`mapped`]: ActiveSubmission::mapped /// [`self.ready_to_map`]: LifetimeTracker::ready_to_map /// [`last_resources`]: ActiveSubmission::last_resources - /// [`self.free_resources`]: LifetimeTracker::free_resources /// [`SubmittedWorkDoneClosure`]: crate::device::queue::SubmittedWorkDoneClosure #[must_use] pub fn triage_submissions( &mut self, last_done: SubmissionIndex, - command_allocator: &mut super::CommandAllocator, + command_allocator: &crate::command::CommandAllocator, ) -> SmallVec<[SubmittedWorkDoneClosure; 1]> { profiling::scope!("triage_submissions"); @@ -558,6 +596,18 @@ impl LifetimeTracker { &mut trackers.textures, |maps| &mut maps.textures, ); + + // We may have been suspected because a texture view or bind group + // referring to us was dropped. Remove stale weak references, so that + // the backlink table doesn't grow without bound. + for texture in self.suspected_resources.textures.values() { + texture.views.lock().retain(|view| view.strong_count() > 0); + texture + .bind_groups + .lock() + .retain(|bg| bg.strong_count() > 0); + } + self } @@ -583,6 +633,13 @@ impl LifetimeTracker { |maps| &mut maps.buffers, ); + // We may have been suspected because a bind group referring to us was + // dropped. Remove stale weak references, so that the backlink table + // doesn't grow without bound. + for buffer in self.suspected_resources.buffers.values() { + buffer.bind_groups.lock().retain(|bg| bg.strong_count() > 0); + } + self } @@ -693,13 +750,10 @@ impl LifetimeTracker { /// Identify resources to free, according to `trackers` and `self.suspected_resources`. /// - /// Given `trackers`, the [`Tracker`] belonging to same [`Device`] as - /// `self`, and `hub`, the [`Hub`] to which that `Device` belongs: - /// - /// Remove from `trackers` each resource mentioned in - /// [`self.suspected_resources`]. If `trackers` held the final reference to - /// that resource, add it to the appropriate free list, to be destroyed by - /// the hal: + /// Remove from `trackers`, the [`Tracker`] belonging to same [`Device`] as + /// `self`, each resource mentioned in [`self.suspected_resources`]. If + /// `trackers` held the final reference to that resource, add it to the + /// appropriate free list, to be destroyed by the hal: /// /// - Add resources used by queue submissions still in flight to the /// [`last_resources`] table of the last such submission's entry in @@ -799,29 +853,33 @@ impl LifetimeTracker { *buffer.map_state.lock() = resource::BufferMapState::Idle; log::trace!("Buffer ready to map {tracker_index:?} is not tracked anymore"); } else { - let mapping = match std::mem::replace( + // This _cannot_ be inlined into the match. If it is, the lock will be held + // open through the whole match, resulting in a deadlock when we try to re-lock + // the buffer back to active. + let mapping = std::mem::replace( &mut *buffer.map_state.lock(), resource::BufferMapState::Idle, - ) { + ); + let pending_mapping = match mapping { resource::BufferMapState::Waiting(pending_mapping) => pending_mapping, // Mapping cancelled resource::BufferMapState::Idle => continue, // Mapping queued at least twice by map -> unmap -> map // and was already successfully mapped below - active @ resource::BufferMapState::Active { .. } => { - *buffer.map_state.lock() = active; + resource::BufferMapState::Active { .. } => { + *buffer.map_state.lock() = mapping; continue; } _ => panic!("No pending mapping."), }; - let status = if mapping.range.start != mapping.range.end { + let status = if pending_mapping.range.start != pending_mapping.range.end { log::debug!("Buffer {tracker_index:?} map state -> Active"); - let host = mapping.op.host; - let size = mapping.range.end - mapping.range.start; + let host = pending_mapping.op.host; + let size = pending_mapping.range.end - pending_mapping.range.start; match super::map_buffer( raw, &buffer, - mapping.range.start, + pending_mapping.range.start, size, host, snatch_guard, @@ -829,7 +887,8 @@ impl LifetimeTracker { Ok(ptr) => { *buffer.map_state.lock() = resource::BufferMapState::Active { ptr, - range: mapping.range.start..mapping.range.start + size, + range: pending_mapping.range.start + ..pending_mapping.range.start + size, host, }; Ok(()) @@ -842,12 +901,12 @@ impl LifetimeTracker { } else { *buffer.map_state.lock() = resource::BufferMapState::Active { ptr: std::ptr::NonNull::dangling(), - range: mapping.range, - host: mapping.op.host, + range: pending_mapping.range, + host: pending_mapping.op.host, }; Ok(()) }; - pending_callbacks.push((mapping.op, status)); + pending_callbacks.push((pending_mapping.op, status)); } } pending_callbacks diff --git a/third_party/rust/wgpu-core/src/device/mod.rs b/third_party/rust/wgpu-core/src/device/mod.rs index e2ab6c2690..e9da11b7a8 100644 --- a/third_party/rust/wgpu-core/src/device/mod.rs +++ b/third_party/rust/wgpu-core/src/device/mod.rs @@ -4,7 +4,6 @@ use crate::{ hub::Hub, id::{BindGroupLayoutId, PipelineLayoutId}, resource::{Buffer, BufferAccessError, BufferAccessResult, BufferMapOperation}, - resource_log, snatch::SnatchGuard, Label, DOWNLEVEL_ERROR_MESSAGE, }; @@ -377,42 +376,6 @@ fn map_buffer( Ok(mapping.ptr) } -pub(crate) struct CommandAllocator { - free_encoders: Vec, -} - -impl CommandAllocator { - fn acquire_encoder( - &mut self, - device: &A::Device, - queue: &A::Queue, - ) -> Result { - match self.free_encoders.pop() { - Some(encoder) => Ok(encoder), - None => unsafe { - let hal_desc = hal::CommandEncoderDescriptor { label: None, queue }; - device.create_command_encoder(&hal_desc) - }, - } - } - - fn release_encoder(&mut self, encoder: A::CommandEncoder) { - self.free_encoders.push(encoder); - } - - fn dispose(self, device: &A::Device) { - resource_log!( - "CommandAllocator::dispose encoders {}", - self.free_encoders.len() - ); - for cmd_encoder in self.free_encoders { - unsafe { - device.destroy_command_encoder(cmd_encoder); - } - } - } -} - #[derive(Clone, Debug, Error)] #[error("Device is invalid")] pub struct InvalidDevice; diff --git a/third_party/rust/wgpu-core/src/device/queue.rs b/third_party/rust/wgpu-core/src/device/queue.rs index 3cb5f695a7..f0db961ffc 100644 --- a/third_party/rust/wgpu-core/src/device/queue.rs +++ b/third_party/rust/wgpu-core/src/device/queue.rs @@ -4,16 +4,17 @@ use crate::{ api_log, command::{ extract_texture_selector, validate_linear_texture_data, validate_texture_copy_range, - ClearError, CommandBuffer, CopySide, ImageCopyTexture, TransferError, + ClearError, CommandAllocator, CommandBuffer, CopySide, ImageCopyTexture, TransferError, }, conv, - device::{life::ResourceMaps, DeviceError, WaitIdleError}, + device::{DeviceError, WaitIdleError}, get_lowest_common_denom, global::Global, hal_api::HalApi, hal_label, id::{self, DeviceId, QueueId}, init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange}, + lock::{rank, Mutex, RwLockWriteGuard}, resource::{ Buffer, BufferAccessError, BufferMapState, DestroyedBuffer, DestroyedTexture, Resource, ResourceInfo, ResourceType, StagingBuffer, Texture, TextureInner, @@ -22,7 +23,6 @@ use crate::{ }; use hal::{CommandEncoder as _, Device as _, Queue as _}; -use parking_lot::Mutex; use smallvec::SmallVec; use std::{ @@ -34,9 +34,9 @@ use thiserror::Error; use super::Device; pub struct Queue { - pub device: Option>>, - pub raw: Option, - pub info: ResourceInfo>, + pub(crate) device: Option>>, + pub(crate) raw: Option, + pub(crate) info: ResourceInfo>, } impl Resource for Queue { @@ -152,13 +152,21 @@ pub enum TempResource { Texture(Arc>), } -/// A queue execution for a particular command encoder. +/// A series of raw [`CommandBuffer`]s that have been submitted to a +/// queue, and the [`wgpu_hal::CommandEncoder`] that built them. +/// +/// [`CommandBuffer`]: hal::Api::CommandBuffer +/// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder pub(crate) struct EncoderInFlight { raw: A::CommandEncoder, cmd_buffers: Vec, } impl EncoderInFlight { + /// Free all of our command buffers. + /// + /// Return the command encoder, fully reset and ready to be + /// reused. pub(crate) unsafe fn land(mut self) -> A::CommandEncoder { unsafe { self.raw.reset_all(self.cmd_buffers.into_iter()) }; self.raw @@ -192,6 +200,8 @@ pub(crate) struct PendingWrites { /// True if `command_encoder` is in the "recording" state, as /// described in the docs for the [`wgpu_hal::CommandEncoder`] /// trait. + /// + /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder pub is_recording: bool, pub temp_resources: Vec>, @@ -253,7 +263,7 @@ impl PendingWrites { #[must_use] fn post_submit( &mut self, - command_allocator: &mut super::CommandAllocator, + command_allocator: &CommandAllocator, device: &A::Device, queue: &A::Queue, ) -> Option> { @@ -307,7 +317,7 @@ fn prepare_staging_buffer( let mapping = unsafe { device.raw().map_buffer(&buffer, 0..size) }?; let staging_buffer = StagingBuffer { - raw: Mutex::new(Some(buffer)), + raw: Mutex::new(rank::STAGING_BUFFER_RAW, Some(buffer)), device: device.clone(), size, info: ResourceInfo::new( @@ -490,7 +500,7 @@ impl Global { prepare_staging_buffer(device, buffer_size.get(), device.instance_flags)?; let fid = hub.staging_buffers.prepare(id_in); - let (id, _) = fid.assign(staging_buffer); + let (id, _) = fid.assign(Arc::new(staging_buffer)); resource_log!("Queue::create_staging_buffer {id:?}"); Ok((id, staging_buffer_ptr)) @@ -707,7 +717,7 @@ impl Global { .get(destination.texture) .map_err(|_| TransferError::InvalidTexture(destination.texture))?; - if dst.device.as_info().id() != queue_id.transmute() { + if dst.device.as_info().id().into_queue_id() != queue_id { return Err(DeviceError::WrongDevice.into()); } @@ -1152,8 +1162,8 @@ impl Global { let snatch_guard = device.snatchable_lock.read(); // Fence lock must be acquired after the snatch lock everywhere to avoid deadlocks. - let mut fence = device.fence.write(); - let fence = fence.as_mut().unwrap(); + let mut fence_guard = device.fence.write(); + let fence = fence_guard.as_mut().unwrap(); let submit_index = device .active_submission_index .fetch_add(1, Ordering::Relaxed) @@ -1173,11 +1183,6 @@ impl Global { //TODO: if multiple command buffers are submitted, we can re-use the last // native command buffer of the previous chain instead of always creating // a temporary one, since the chains are not finished. - let mut temp_suspected = device.temp_suspected.lock(); - { - let mut suspected = temp_suspected.replace(ResourceMaps::new()).unwrap(); - suspected.clear(); - } // finish all the command buffers first for &cmb_id in command_buffer_ids { @@ -1191,7 +1196,7 @@ impl Global { Err(_) => continue, }; - if cmdbuf.device.as_info().id() != queue_id.transmute() { + if cmdbuf.device.as_info().id().into_queue_id() != queue_id { return Err(DeviceError::WrongDevice.into()); } @@ -1210,13 +1215,10 @@ impl Global { )); } if !cmdbuf.is_finished() { - if let Some(cmdbuf) = Arc::into_inner(cmdbuf) { - device.destroy_command_buffer(cmdbuf); - } else { - panic!( - "Command buffer cannot be destroyed because is still in use" - ); - } + let cmdbuf = Arc::into_inner(cmdbuf).expect( + "Command buffer cannot be destroyed because is still in use", + ); + device.destroy_command_buffer(cmdbuf); continue; } @@ -1228,41 +1230,23 @@ impl Global { // update submission IDs for buffer in cmd_buf_trackers.buffers.used_resources() { - let tracker_index = buffer.info.tracker_index(); - let raw_buf = match buffer.raw.get(&snatch_guard) { - Some(raw) => raw, - None => { - return Err(QueueSubmitError::DestroyedBuffer( - buffer.info.id(), - )); - } - }; + if buffer.raw.get(&snatch_guard).is_none() { + return Err(QueueSubmitError::DestroyedBuffer( + buffer.info.id(), + )); + } buffer.info.use_at(submit_index); - if buffer.is_unique() { - if let BufferMapState::Active { .. } = *buffer.map_state.lock() - { - log::warn!("Dropped buffer has a pending mapping."); - unsafe { device.raw().unmap_buffer(raw_buf) } - .map_err(DeviceError::from)?; - } - temp_suspected - .as_mut() - .unwrap() - .buffers - .insert(tracker_index, buffer.clone()); - } else { - match *buffer.map_state.lock() { - BufferMapState::Idle => (), - _ => { - return Err(QueueSubmitError::BufferStillMapped( - buffer.info.id(), - )) - } + + match *buffer.map_state.lock() { + BufferMapState::Idle => (), + _ => { + return Err(QueueSubmitError::BufferStillMapped( + buffer.info.id(), + )) } } } for texture in cmd_buf_trackers.textures.used_resources() { - let tracker_index = texture.info.tracker_index(); let should_extend = match texture.inner.get(&snatch_guard) { None => { return Err(QueueSubmitError::DestroyedTexture( @@ -1279,13 +1263,6 @@ impl Global { } }; texture.info.use_at(submit_index); - if texture.is_unique() { - temp_suspected - .as_mut() - .unwrap() - .textures - .insert(tracker_index, texture.clone()); - } if should_extend { unsafe { used_surface_textures @@ -1296,12 +1273,6 @@ impl Global { } for texture_view in cmd_buf_trackers.views.used_resources() { texture_view.info.use_at(submit_index); - if texture_view.is_unique() { - temp_suspected.as_mut().unwrap().texture_views.insert( - texture_view.as_info().tracker_index(), - texture_view.clone(), - ); - } } { for bg in cmd_buf_trackers.bind_groups.used_resources() { @@ -1315,13 +1286,6 @@ impl Global { for sampler in bg.used.samplers.used_resources() { sampler.info.use_at(submit_index); } - if bg.is_unique() { - temp_suspected - .as_mut() - .unwrap() - .bind_groups - .insert(bg.as_info().tracker_index(), bg.clone()); - } } } // assert!(cmd_buf_trackers.samplers.is_empty()); @@ -1329,32 +1293,14 @@ impl Global { cmd_buf_trackers.compute_pipelines.used_resources() { compute_pipeline.info.use_at(submit_index); - if compute_pipeline.is_unique() { - temp_suspected.as_mut().unwrap().compute_pipelines.insert( - compute_pipeline.as_info().tracker_index(), - compute_pipeline.clone(), - ); - } } for render_pipeline in cmd_buf_trackers.render_pipelines.used_resources() { render_pipeline.info.use_at(submit_index); - if render_pipeline.is_unique() { - temp_suspected.as_mut().unwrap().render_pipelines.insert( - render_pipeline.as_info().tracker_index(), - render_pipeline.clone(), - ); - } } for query_set in cmd_buf_trackers.query_sets.used_resources() { query_set.info.use_at(submit_index); - if query_set.is_unique() { - temp_suspected.as_mut().unwrap().query_sets.insert( - query_set.as_info().tracker_index(), - query_set.clone(), - ); - } } for bundle in cmd_buf_trackers.bundles.used_resources() { bundle.info.use_at(submit_index); @@ -1369,13 +1315,6 @@ impl Global { for query_set in bundle.used.query_sets.read().used_resources() { query_set.info.use_at(submit_index); } - if bundle.is_unique() { - temp_suspected - .as_mut() - .unwrap() - .render_bundles - .insert(bundle.as_info().tracker_index(), bundle.clone()); - } } } let mut baked = cmdbuf.from_arc_into_baked(); @@ -1452,8 +1391,8 @@ impl Global { } } - let mut pending_writes = device.pending_writes.lock(); - let pending_writes = pending_writes.as_mut().unwrap(); + let mut pending_writes_guard = device.pending_writes.lock(); + let pending_writes = pending_writes_guard.as_mut().unwrap(); { used_surface_textures.set_size(hub.textures.read().len()); @@ -1528,7 +1467,7 @@ impl Global { profiling::scope!("cleanup"); if let Some(pending_execution) = pending_writes.post_submit( - device.command_allocator.lock().as_mut().unwrap(), + &device.command_allocator, device.raw(), queue.raw.as_ref().unwrap(), ) { @@ -1543,18 +1482,22 @@ impl Global { active_executions, ); - // This will schedule destruction of all resources that are no longer needed - // by the user but used in the command stream, among other things. - let (closures, _) = match device.maintain(fence, wgt::Maintain::Poll, snatch_guard) { - Ok(closures) => closures, - Err(WaitIdleError::Device(err)) => return Err(QueueSubmitError::Queue(err)), - Err(WaitIdleError::StuckGpu) => return Err(QueueSubmitError::StuckGpu), - Err(WaitIdleError::WrongSubmissionIndex(..)) => unreachable!(), - }; - // pending_write_resources has been drained, so it's empty, but we // want to retain its heap allocation. pending_writes.temp_resources = pending_write_resources; + drop(pending_writes_guard); + + // This will schedule destruction of all resources that are no longer needed + // by the user but used in the command stream, among other things. + let fence_guard = RwLockWriteGuard::downgrade(fence_guard); + let (closures, _) = + match device.maintain(fence_guard, wgt::Maintain::Poll, snatch_guard) { + Ok(closures) => closures, + Err(WaitIdleError::Device(err)) => return Err(QueueSubmitError::Queue(err)), + Err(WaitIdleError::StuckGpu) => return Err(QueueSubmitError::StuckGpu), + Err(WaitIdleError::WrongSubmissionIndex(..)) => unreachable!(), + }; + device.lock_life().post_submit(); (submit_index, closures) diff --git a/third_party/rust/wgpu-core/src/device/resource.rs b/third_party/rust/wgpu-core/src/device/resource.rs index 4892aecb75..2541af7c70 100644 --- a/third_party/rust/wgpu-core/src/device/resource.rs +++ b/third_party/rust/wgpu-core/src/device/resource.rs @@ -7,18 +7,20 @@ use crate::{ bgl, life::{LifetimeTracker, WaitIdleError}, queue::PendingWrites, - AttachmentData, CommandAllocator, DeviceLostInvocation, MissingDownlevelFlags, - MissingFeatures, RenderPassContext, CLEANUP_WAIT_MS, + AttachmentData, DeviceLostInvocation, MissingDownlevelFlags, MissingFeatures, + RenderPassContext, CLEANUP_WAIT_MS, }, hal_api::HalApi, hal_label, hub::Hub, + id, init_tracker::{ BufferInitTracker, BufferInitTrackerAction, MemoryInitKind, TextureInitRange, TextureInitTracker, TextureInitTrackerAction, }, instance::Adapter, - pipeline, + lock::{rank, Mutex, MutexGuard, RwLock}, + pipeline::{self}, pool::ResourcePool, registry::Registry, resource::{ @@ -41,7 +43,6 @@ use crate::{ use arrayvec::ArrayVec; use hal::{CommandEncoder as _, Device as _}; use once_cell::sync::OnceCell; -use parking_lot::{Mutex, MutexGuard, RwLock}; use smallvec::SmallVec; use thiserror::Error; @@ -97,7 +98,7 @@ pub struct Device { pub(crate) zero_buffer: Option, pub(crate) info: ResourceInfo>, - pub(crate) command_allocator: Mutex>>, + pub(crate) command_allocator: command::CommandAllocator, //Note: The submission index here corresponds to the last submission that is done. pub(crate) active_submission_index: AtomicU64, //SubmissionIndex, // NOTE: if both are needed, the `snatchable_lock` must be consistently acquired before the @@ -126,9 +127,6 @@ pub struct Device { pub(crate) tracker_indices: TrackerIndexAllocators, // Life tracker should be locked right after the device and before anything else. life_tracker: Mutex>, - /// Temporary storage for resource management functions. Cleared at the end - /// of every call (unless an error occurs). - pub(crate) temp_suspected: Mutex>>, /// Pool of bind group layouts, allowing deduplication. pub(crate) bgl_pool: ResourcePool>, pub(crate) alignments: hal::Alignments, @@ -141,6 +139,10 @@ pub struct Device { #[cfg(feature = "trace")] pub(crate) trace: Mutex>, pub(crate) usage_scopes: UsageScopePool, + + /// Temporary storage, cleared at the start of every call, + /// retained only to save allocations. + temp_suspected: Mutex>>, } pub(crate) enum DeferredDestroy { @@ -165,7 +167,7 @@ impl Drop for Device { let raw = self.raw.take().unwrap(); let pending_writes = self.pending_writes.lock().take().unwrap(); pending_writes.dispose(&raw); - self.command_allocator.lock().take().unwrap().dispose(&raw); + self.command_allocator.dispose(&raw); unsafe { raw.destroy_buffer(self.zero_buffer.take().unwrap()); raw.destroy_fence(self.fence.write().take().unwrap()); @@ -223,10 +225,8 @@ impl Device { let fence = unsafe { raw_device.create_fence() }.map_err(|_| CreateDeviceError::OutOfMemory)?; - let mut com_alloc = CommandAllocator { - free_encoders: Vec::new(), - }; - let pending_encoder = com_alloc + let command_allocator = command::CommandAllocator::new(); + let pending_encoder = command_allocator .acquire_encoder(&raw_device, raw_queue) .map_err(|_| CreateDeviceError::OutOfMemory)?; let mut pending_writes = queue::PendingWrites::::new(pending_encoder); @@ -271,38 +271,44 @@ impl Device { queue_to_drop: OnceCell::new(), zero_buffer: Some(zero_buffer), info: ResourceInfo::new("", None), - command_allocator: Mutex::new(Some(com_alloc)), + command_allocator, active_submission_index: AtomicU64::new(0), - fence: RwLock::new(Some(fence)), - snatchable_lock: unsafe { SnatchLock::new() }, + fence: RwLock::new(rank::DEVICE_FENCE, Some(fence)), + snatchable_lock: unsafe { SnatchLock::new(rank::DEVICE_SNATCHABLE_LOCK) }, valid: AtomicBool::new(true), - trackers: Mutex::new(Tracker::new()), + trackers: Mutex::new(rank::DEVICE_TRACKERS, Tracker::new()), tracker_indices: TrackerIndexAllocators::new(), - life_tracker: Mutex::new(life::LifetimeTracker::new()), - temp_suspected: Mutex::new(Some(life::ResourceMaps::new())), + life_tracker: Mutex::new(rank::DEVICE_LIFE_TRACKER, life::LifetimeTracker::new()), + temp_suspected: Mutex::new( + rank::DEVICE_TEMP_SUSPECTED, + Some(life::ResourceMaps::new()), + ), bgl_pool: ResourcePool::new(), #[cfg(feature = "trace")] - trace: Mutex::new(trace_path.and_then(|path| match trace::Trace::new(path) { - Ok(mut trace) => { - trace.add(trace::Action::Init { - desc: desc.clone(), - backend: A::VARIANT, - }); - Some(trace) - } - Err(e) => { - log::error!("Unable to start a trace in '{path:?}': {e}"); - None - } - })), + trace: Mutex::new( + rank::DEVICE_TRACE, + trace_path.and_then(|path| match trace::Trace::new(path) { + Ok(mut trace) => { + trace.add(trace::Action::Init { + desc: desc.clone(), + backend: A::VARIANT, + }); + Some(trace) + } + Err(e) => { + log::error!("Unable to start a trace in '{path:?}': {e}"); + None + } + }), + ), alignments, limits: desc.required_limits.clone(), features: desc.required_features, downlevel, instance_flags, - pending_writes: Mutex::new(Some(pending_writes)), - deferred_destroy: Mutex::new(Vec::new()), - usage_scopes: Default::default(), + pending_writes: Mutex::new(rank::DEVICE_PENDING_WRITES, Some(pending_writes)), + deferred_destroy: Mutex::new(rank::DEVICE_DEFERRED_DESTROY, Vec::new()), + usage_scopes: Mutex::new(rank::DEVICE_USAGE_SCOPES, Default::default()), }) } @@ -379,7 +385,7 @@ impl Device { /// Check this device for completed commands. /// - /// The `maintain` argument tells how the maintence function should behave, either + /// The `maintain` argument tells how the maintenance function should behave, either /// blocking or just polling the current state of the gpu. /// /// Return a pair `(closures, queue_empty)`, where: @@ -392,11 +398,12 @@ impl Device { /// return it to our callers.) pub(crate) fn maintain<'this>( &'this self, - fence: &A::Fence, + fence_guard: crate::lock::RwLockReadGuard>, maintain: wgt::Maintain, snatch_guard: SnatchGuard, ) -> Result<(UserClosures, bool), WaitIdleError> { profiling::scope!("Device::maintain"); + let fence = fence_guard.as_ref().unwrap(); let last_done_index = if maintain.is_wait() { let index_to_wait_for = match maintain { wgt::Maintain::WaitForSubmissionIndex(submission_index) => { @@ -425,28 +432,12 @@ impl Device { }; let mut life_tracker = self.lock_life(); - let submission_closures = life_tracker.triage_submissions( - last_done_index, - self.command_allocator.lock().as_mut().unwrap(), - ); - - { - // Normally, `temp_suspected` exists only to save heap - // allocations: it's cleared at the start of the function - // call, and cleared by the end. But `Global::queue_submit` is - // fallible; if it exits early, it may leave some resources in - // `temp_suspected`. - let temp_suspected = self - .temp_suspected - .lock() - .replace(ResourceMaps::new()) - .unwrap(); + let submission_closures = + life_tracker.triage_submissions(last_done_index, &self.command_allocator); - life_tracker.suspected_resources.extend(temp_suspected); + life_tracker.triage_suspected(&self.trackers); - life_tracker.triage_suspected(&self.trackers); - life_tracker.triage_mapped(); - } + life_tracker.triage_mapped(); let mapping_closures = life_tracker.handle_mapping(self.raw(), &self.trackers, &snatch_guard); @@ -478,6 +469,7 @@ impl Device { // Don't hold the locks while calling release_gpu_resources. drop(life_tracker); + drop(fence_guard); drop(snatch_guard); if should_release_gpu_resource { @@ -493,12 +485,14 @@ impl Device { } pub(crate) fn untrack(&self, trackers: &Tracker) { + // If we have a previously allocated `ResourceMap`, just use that. let mut temp_suspected = self .temp_suspected .lock() - .replace(ResourceMaps::new()) - .unwrap(); + .take() + .unwrap_or_else(|| ResourceMaps::new()); temp_suspected.clear(); + // As the tracker is cleared/dropped, we need to consider all the resources // that it references for destruction in the next GC pass. { @@ -559,7 +553,11 @@ impl Device { } } } - self.lock_life().suspected_resources.extend(temp_suspected); + self.lock_life() + .suspected_resources + .extend(&mut temp_suspected); + // Save this resource map for later reuse. + *self.temp_suspected.lock() = Some(temp_suspected); } pub(crate) fn create_buffer( @@ -653,14 +651,17 @@ impl Device { device: self.clone(), usage: desc.usage, size: desc.size, - initialization_status: RwLock::new(BufferInitTracker::new(aligned_size)), - sync_mapped_writes: Mutex::new(None), - map_state: Mutex::new(resource::BufferMapState::Idle), + initialization_status: RwLock::new( + rank::BUFFER_INITIALIZATION_STATUS, + BufferInitTracker::new(aligned_size), + ), + sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None), + map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle), info: ResourceInfo::new( desc.label.borrow_or_default(), Some(self.tracker_indices.buffers.clone()), ), - bind_groups: Mutex::new(Vec::new()), + bind_groups: Mutex::new(rank::BUFFER_BIND_GROUPS, Vec::new()), }) } @@ -680,10 +681,10 @@ impl Device { desc: desc.map_label(|_| ()), hal_usage, format_features, - initialization_status: RwLock::new(TextureInitTracker::new( - desc.mip_level_count, - desc.array_layer_count(), - )), + initialization_status: RwLock::new( + rank::TEXTURE_INITIALIZATION_STATUS, + TextureInitTracker::new(desc.mip_level_count, desc.array_layer_count()), + ), full_range: TextureSelector { mips: 0..desc.mip_level_count, layers: 0..desc.array_layer_count(), @@ -692,9 +693,9 @@ impl Device { desc.label.borrow_or_default(), Some(self.tracker_indices.textures.clone()), ), - clear_mode: RwLock::new(clear_mode), - views: Mutex::new(Vec::new()), - bind_groups: Mutex::new(Vec::new()), + clear_mode: RwLock::new(rank::TEXTURE_CLEAR_MODE, clear_mode), + views: Mutex::new(rank::TEXTURE_VIEWS, Vec::new()), + bind_groups: Mutex::new(rank::TEXTURE_BIND_GROUPS, Vec::new()), } } @@ -710,14 +711,17 @@ impl Device { device: self.clone(), usage: desc.usage, size: desc.size, - initialization_status: RwLock::new(BufferInitTracker::new(0)), - sync_mapped_writes: Mutex::new(None), - map_state: Mutex::new(resource::BufferMapState::Idle), + initialization_status: RwLock::new( + rank::BUFFER_INITIALIZATION_STATUS, + BufferInitTracker::new(0), + ), + sync_mapped_writes: Mutex::new(rank::BUFFER_SYNC_MAPPED_WRITES, None), + map_state: Mutex::new(rank::BUFFER_MAP_STATE, resource::BufferMapState::Idle), info: ResourceInfo::new( desc.label.borrow_or_default(), Some(self.tracker_indices.buffers.clone()), ), - bind_groups: Mutex::new(Vec::new()), + bind_groups: Mutex::new(rank::BUFFER_BIND_GROUPS, Vec::new()), } } @@ -1421,7 +1425,7 @@ impl Device { pipeline::ShaderModuleSource::Wgsl(code) => { profiling::scope!("naga::front::wgsl::parse_str"); let module = naga::front::wgsl::parse_str(&code).map_err(|inner| { - pipeline::CreateShaderModuleError::Parsing(pipeline::ShaderError { + pipeline::CreateShaderModuleError::Parsing(naga::error::ShaderError { source: code.to_string(), label: desc.label.as_ref().map(|l| l.to_string()), inner: Box::new(inner), @@ -1434,7 +1438,7 @@ impl Device { let parser = naga::front::spv::Frontend::new(spv.iter().cloned(), &options); profiling::scope!("naga::front::spv::Frontend"); let module = parser.parse().map_err(|inner| { - pipeline::CreateShaderModuleError::ParsingSpirV(pipeline::ShaderError { + pipeline::CreateShaderModuleError::ParsingSpirV(naga::error::ShaderError { source: String::new(), label: desc.label.as_ref().map(|l| l.to_string()), inner: Box::new(inner), @@ -1447,7 +1451,7 @@ impl Device { let mut parser = naga::front::glsl::Frontend::default(); profiling::scope!("naga::front::glsl::Frontend.parse"); let module = parser.parse(&options, &code).map_err(|inner| { - pipeline::CreateShaderModuleError::ParsingGlsl(pipeline::ShaderError { + pipeline::CreateShaderModuleError::ParsingGlsl(naga::error::ShaderError { source: code.to_string(), label: desc.label.as_ref().map(|l| l.to_string()), inner: Box::new(inner), @@ -1471,9 +1475,78 @@ impl Device { }; } - use naga::valid::Capabilities as Caps; profiling::scope!("naga::validate"); + let debug_source = + if self.instance_flags.contains(wgt::InstanceFlags::DEBUG) && !source.is_empty() { + Some(hal::DebugSource { + file_name: Cow::Owned( + desc.label + .as_ref() + .map_or("shader".to_string(), |l| l.to_string()), + ), + source_code: Cow::Owned(source.clone()), + }) + } else { + None + }; + + let info = self + .create_validator(naga::valid::ValidationFlags::all()) + .validate(&module) + .map_err(|inner| { + pipeline::CreateShaderModuleError::Validation(naga::error::ShaderError { + source, + label: desc.label.as_ref().map(|l| l.to_string()), + inner: Box::new(inner), + }) + })?; + + let interface = + validation::Interface::new(&module, &info, self.limits.clone(), self.features); + let hal_shader = hal::ShaderInput::Naga(hal::NagaShader { + module, + info, + debug_source, + }); + let hal_desc = hal::ShaderModuleDescriptor { + label: desc.label.to_hal(self.instance_flags), + runtime_checks: desc.shader_bound_checks.runtime_checks(), + }; + let raw = match unsafe { + self.raw + .as_ref() + .unwrap() + .create_shader_module(&hal_desc, hal_shader) + } { + Ok(raw) => raw, + Err(error) => { + return Err(match error { + hal::ShaderError::Device(error) => { + pipeline::CreateShaderModuleError::Device(error.into()) + } + hal::ShaderError::Compilation(ref msg) => { + log::error!("Shader error: {}", msg); + pipeline::CreateShaderModuleError::Generation + } + }) + } + }; + + Ok(pipeline::ShaderModule { + raw: Some(raw), + device: self.clone(), + interface: Some(interface), + info: ResourceInfo::new(desc.label.borrow_or_default(), None), + label: desc.label.borrow_or_default().to_string(), + }) + } + /// Create a validator with the given validation flags. + pub fn create_validator( + self: &Arc, + flags: naga::valid::ValidationFlags, + ) -> naga::valid::Validator { + use naga::valid::Capabilities as Caps; let mut caps = Caps::empty(); caps.set( Caps::PUSH_CONSTANT, @@ -1541,69 +1614,36 @@ impl Device { .flags .contains(wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES), ); + caps.set( + Caps::SUBGROUP, + self.features + .intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX), + ); + caps.set( + Caps::SUBGROUP_BARRIER, + self.features.intersects(wgt::Features::SUBGROUP_BARRIER), + ); - let debug_source = - if self.instance_flags.contains(wgt::InstanceFlags::DEBUG) && !source.is_empty() { - Some(hal::DebugSource { - file_name: Cow::Owned( - desc.label - .as_ref() - .map_or("shader".to_string(), |l| l.to_string()), - ), - source_code: Cow::Owned(source.clone()), - }) - } else { - None - }; - - let info = naga::valid::Validator::new(naga::valid::ValidationFlags::all(), caps) - .validate(&module) - .map_err(|inner| { - pipeline::CreateShaderModuleError::Validation(pipeline::ShaderError { - source, - label: desc.label.as_ref().map(|l| l.to_string()), - inner: Box::new(inner), - }) - })?; + let mut subgroup_stages = naga::valid::ShaderStages::empty(); + subgroup_stages.set( + naga::valid::ShaderStages::COMPUTE | naga::valid::ShaderStages::FRAGMENT, + self.features.contains(wgt::Features::SUBGROUP), + ); + subgroup_stages.set( + naga::valid::ShaderStages::VERTEX, + self.features.contains(wgt::Features::SUBGROUP_VERTEX), + ); - let interface = - validation::Interface::new(&module, &info, self.limits.clone(), self.features); - let hal_shader = hal::ShaderInput::Naga(hal::NagaShader { - module, - info, - debug_source, - }); - let hal_desc = hal::ShaderModuleDescriptor { - label: desc.label.to_hal(self.instance_flags), - runtime_checks: desc.shader_bound_checks.runtime_checks(), - }; - let raw = match unsafe { - self.raw - .as_ref() - .unwrap() - .create_shader_module(&hal_desc, hal_shader) - } { - Ok(raw) => raw, - Err(error) => { - return Err(match error { - hal::ShaderError::Device(error) => { - pipeline::CreateShaderModuleError::Device(error.into()) - } - hal::ShaderError::Compilation(ref msg) => { - log::error!("Shader error: {}", msg); - pipeline::CreateShaderModuleError::Generation - } - }) - } + let subgroup_operations = if caps.contains(Caps::SUBGROUP) { + use naga::valid::SubgroupOperationSet as S; + S::BASIC | S::VOTE | S::ARITHMETIC | S::BALLOT | S::SHUFFLE | S::SHUFFLE_RELATIVE + } else { + naga::valid::SubgroupOperationSet::empty() }; - - Ok(pipeline::ShaderModule { - raw: Some(raw), - device: self.clone(), - interface: Some(interface), - info: ResourceInfo::new(desc.label.borrow_or_default(), None), - label: desc.label.borrow_or_default().to_string(), - }) + let mut validator = naga::valid::Validator::new(flags, caps); + validator.subgroup_stages(subgroup_stages); + validator.subgroup_operations(subgroup_operations); + validator } #[allow(unused_unsafe)] @@ -1913,6 +1953,7 @@ impl Device { used: &mut BindGroupStates, storage: &'a Storage>, limits: &wgt::Limits, + device_id: id::Id, snatch_guard: &'a SnatchGuard<'a>, ) -> Result, binding_model::CreateBindGroupError> { use crate::binding_model::CreateBindGroupError as Error; @@ -1931,6 +1972,7 @@ impl Device { }) } }; + let (pub_usage, internal_use, range_limit) = match binding_ty { wgt::BufferBindingType::Uniform => ( wgt::BufferUsages::UNIFORM, @@ -1963,6 +2005,10 @@ impl Device { .add_single(storage, bb.buffer_id, internal_use) .ok_or(Error::InvalidBuffer(bb.buffer_id))?; + if buffer.device.as_info().id() != device_id { + return Err(DeviceError::WrongDevice.into()); + } + check_buffer_usage(bb.buffer_id, buffer.usage, pub_usage)?; let raw_buffer = buffer .raw @@ -2041,13 +2087,53 @@ impl Device { }) } - pub(crate) fn create_texture_binding( - view: &TextureView, - internal_use: hal::TextureUses, - pub_usage: wgt::TextureUsages, + fn create_sampler_binding<'a>( + used: &BindGroupStates, + storage: &'a Storage>, + id: id::Id, + device_id: id::Id, + ) -> Result<&'a Sampler, binding_model::CreateBindGroupError> { + use crate::binding_model::CreateBindGroupError as Error; + + let sampler = used + .samplers + .add_single(storage, id) + .ok_or(Error::InvalidSampler(id))?; + + if sampler.device.as_info().id() != device_id { + return Err(DeviceError::WrongDevice.into()); + } + + Ok(sampler) + } + + pub(crate) fn create_texture_binding<'a>( + self: &Arc, + binding: u32, + decl: &wgt::BindGroupLayoutEntry, + storage: &'a Storage>, + id: id::Id, used: &mut BindGroupStates, used_texture_ranges: &mut Vec>, - ) -> Result<(), binding_model::CreateBindGroupError> { + snatch_guard: &'a SnatchGuard<'a>, + ) -> Result, binding_model::CreateBindGroupError> { + use crate::binding_model::CreateBindGroupError as Error; + + let view = used + .views + .add_single(storage, id) + .ok_or(Error::InvalidTextureView(id))?; + + if view.device.as_info().id() != self.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + + let (pub_usage, internal_use) = self.texture_use_parameters( + binding, + decl, + view, + "SampledTexture, ReadonlyStorageTexture or WriteonlyStorageTexture", + )?; let texture = &view.parent; let texture_id = texture.as_info().id(); // Careful here: the texture may no longer have its own ref count, @@ -2077,7 +2163,12 @@ impl Device { kind: MemoryInitKind::NeedsInitializedMemory, }); - Ok(()) + Ok(hal::TextureBinding { + view: view + .raw(snatch_guard) + .ok_or(Error::InvalidTextureView(id))?, + usage: internal_use, + }) } // This function expects the provided bind group layout to be resolved @@ -2139,6 +2230,7 @@ impl Device { &mut used, &*buffer_guard, &self.limits, + self.as_info().id(), &snatch_guard, )?; @@ -2162,105 +2254,86 @@ impl Device { &mut used, &*buffer_guard, &self.limits, + self.as_info().id(), &snatch_guard, )?; hal_buffers.push(bb); } (res_index, num_bindings) } - Br::Sampler(id) => { - match decl.ty { - wgt::BindingType::Sampler(ty) => { - let sampler = used - .samplers - .add_single(&*sampler_guard, id) - .ok_or(Error::InvalidSampler(id))?; - - if sampler.device.as_info().id() != self.as_info().id() { - return Err(DeviceError::WrongDevice.into()); - } - - // Allowed sampler values for filtering and comparison - let (allowed_filtering, allowed_comparison) = match ty { - wgt::SamplerBindingType::Filtering => (None, false), - wgt::SamplerBindingType::NonFiltering => (Some(false), false), - wgt::SamplerBindingType::Comparison => (None, true), - }; - - if let Some(allowed_filtering) = allowed_filtering { - if allowed_filtering != sampler.filtering { - return Err(Error::WrongSamplerFiltering { - binding, - layout_flt: allowed_filtering, - sampler_flt: sampler.filtering, - }); - } - } + Br::Sampler(id) => match decl.ty { + wgt::BindingType::Sampler(ty) => { + let sampler = Self::create_sampler_binding( + &used, + &sampler_guard, + id, + self.as_info().id(), + )?; - if allowed_comparison != sampler.comparison { - return Err(Error::WrongSamplerComparison { + let (allowed_filtering, allowed_comparison) = match ty { + wgt::SamplerBindingType::Filtering => (None, false), + wgt::SamplerBindingType::NonFiltering => (Some(false), false), + wgt::SamplerBindingType::Comparison => (None, true), + }; + if let Some(allowed_filtering) = allowed_filtering { + if allowed_filtering != sampler.filtering { + return Err(Error::WrongSamplerFiltering { binding, - layout_cmp: allowed_comparison, - sampler_cmp: sampler.comparison, + layout_flt: allowed_filtering, + sampler_flt: sampler.filtering, }); } - - let res_index = hal_samplers.len(); - hal_samplers.push(sampler.raw()); - (res_index, 1) } - _ => { - return Err(Error::WrongBindingType { + if allowed_comparison != sampler.comparison { + return Err(Error::WrongSamplerComparison { binding, - actual: decl.ty, - expected: "Sampler", - }) + layout_cmp: allowed_comparison, + sampler_cmp: sampler.comparison, + }); } + + let res_index = hal_samplers.len(); + hal_samplers.push(sampler.raw()); + (res_index, 1) } - } + _ => { + return Err(Error::WrongBindingType { + binding, + actual: decl.ty, + expected: "Sampler", + }) + } + }, Br::SamplerArray(ref bindings_array) => { let num_bindings = bindings_array.len(); Self::check_array_binding(self.features, decl.count, num_bindings)?; let res_index = hal_samplers.len(); for &id in bindings_array.iter() { - let sampler = used - .samplers - .add_single(&*sampler_guard, id) - .ok_or(Error::InvalidSampler(id))?; - if sampler.device.as_info().id() != self.as_info().id() { - return Err(DeviceError::WrongDevice.into()); - } + let sampler = Self::create_sampler_binding( + &used, + &sampler_guard, + id, + self.as_info().id(), + )?; + hal_samplers.push(sampler.raw()); } (res_index, num_bindings) } Br::TextureView(id) => { - let view = used - .views - .add_single(&*texture_view_guard, id) - .ok_or(Error::InvalidTextureView(id))?; - let (pub_usage, internal_use) = self.texture_use_parameters( + let tb = self.create_texture_binding( binding, decl, - view, - "SampledTexture, ReadonlyStorageTexture or WriteonlyStorageTexture", - )?; - Self::create_texture_binding( - view, - internal_use, - pub_usage, + &texture_view_guard, + id, &mut used, &mut used_texture_ranges, + &snatch_guard, )?; let res_index = hal_textures.len(); - hal_textures.push(hal::TextureBinding { - view: view - .raw(&snatch_guard) - .ok_or(Error::InvalidTextureView(id))?, - usage: internal_use, - }); + hal_textures.push(tb); (res_index, 1) } Br::TextureViewArray(ref bindings_array) => { @@ -2269,26 +2342,17 @@ impl Device { let res_index = hal_textures.len(); for &id in bindings_array.iter() { - let view = used - .views - .add_single(&*texture_view_guard, id) - .ok_or(Error::InvalidTextureView(id))?; - let (pub_usage, internal_use) = - self.texture_use_parameters(binding, decl, view, - "SampledTextureArray, ReadonlyStorageTextureArray or WriteonlyStorageTextureArray")?; - Self::create_texture_binding( - view, - internal_use, - pub_usage, + let tb = self.create_texture_binding( + binding, + decl, + &texture_view_guard, + id, &mut used, &mut used_texture_ranges, + &snatch_guard, )?; - hal_textures.push(hal::TextureBinding { - view: view - .raw(&snatch_guard) - .ok_or(Error::InvalidTextureView(id))?, - usage: internal_use, - }); + + hal_textures.push(tb); } (res_index, num_bindings) @@ -2762,8 +2826,10 @@ impl Device { label: desc.label.to_hal(self.instance_flags), layout: pipeline_layout.raw(), stage: hal::ProgrammableStage { - entry_point: final_entry_point_name.as_ref(), module: shader_module.raw(), + entry_point: final_entry_point_name.as_ref(), + constants: desc.stage.constants.as_ref(), + zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory, }, }; @@ -3178,6 +3244,8 @@ impl Device { hal::ProgrammableStage { module: vertex_shader_module.raw(), entry_point: &vertex_entry_point_name, + constants: stage_desc.constants.as_ref(), + zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory, } }; @@ -3237,6 +3305,10 @@ impl Device { Some(hal::ProgrammableStage { module: shader_module.raw(), entry_point: &fragment_entry_point_name, + constants: fragment_state.stage.constants.as_ref(), + zero_initialize_workgroup_memory: fragment_state + .stage + .zero_initialize_workgroup_memory, }) } None => None, @@ -3482,10 +3554,9 @@ impl Device { .map_err(DeviceError::from)? }; drop(guard); - let closures = self.lock_life().triage_submissions( - submission_index, - self.command_allocator.lock().as_mut().unwrap(), - ); + let closures = self + .lock_life() + .triage_submissions(submission_index, &self.command_allocator); assert!( closures.is_empty(), "wait_for_submit is not expected to work with closures" @@ -3613,10 +3684,7 @@ impl Device { log::error!("failed to wait for the device: {error}"); } let mut life_tracker = self.lock_life(); - let _ = life_tracker.triage_submissions( - current_index, - self.command_allocator.lock().as_mut().unwrap(), - ); + let _ = life_tracker.triage_submissions(current_index, &self.command_allocator); if let Some(device_lost_closure) = life_tracker.device_lost_closure.take() { // It's important to not hold the lock while calling the closure. drop(life_tracker); diff --git a/third_party/rust/wgpu-core/src/global.rs b/third_party/rust/wgpu-core/src/global.rs index 9a8c43bf33..6f6756a88c 100644 --- a/third_party/rust/wgpu-core/src/global.rs +++ b/third_party/rust/wgpu-core/src/global.rs @@ -45,7 +45,7 @@ impl GlobalReport { pub struct Global { pub instance: Instance, - pub surfaces: Registry, + pub(crate) surfaces: Registry, pub(crate) hubs: Hubs, } @@ -155,11 +155,9 @@ impl Drop for Global { // destroy surfaces for element in surfaces_locked.map.drain(..) { if let Element::Occupied(arc_surface, _) = element { - if let Some(surface) = Arc::into_inner(arc_surface) { - self.instance.destroy_surface(surface); - } else { - panic!("Surface cannot be destroyed because is still in use"); - } + let surface = Arc::into_inner(arc_surface) + .expect("Surface cannot be destroyed because is still in use"); + self.instance.destroy_surface(surface); } } } diff --git a/third_party/rust/wgpu-core/src/hal_api.rs b/third_party/rust/wgpu-core/src/hal_api.rs index 179024baed..f1a40b1cff 100644 --- a/third_party/rust/wgpu-core/src/hal_api.rs +++ b/third_party/rust/wgpu-core/src/hal_api.rs @@ -11,7 +11,7 @@ pub trait HalApi: hal::Api + 'static + WasmNotSendSync { fn create_instance_from_hal(name: &str, hal_instance: Self::Instance) -> Instance; fn instance_as_hal(instance: &Instance) -> Option<&Self::Instance>; fn hub(global: &Global) -> &Hub; - fn get_surface(surface: &Surface) -> Option<&Self::Surface>; + fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface>; } impl HalApi for hal::api::Empty { @@ -25,7 +25,7 @@ impl HalApi for hal::api::Empty { fn hub(_: &Global) -> &Hub { unimplemented!("called empty api") } - fn get_surface(_: &Surface) -> Option<&Self::Surface> { + fn surface_as_hal(_: &Surface) -> Option<&Self::Surface> { unimplemented!("called empty api") } } @@ -46,8 +46,8 @@ impl HalApi for hal::api::Vulkan { fn hub(global: &Global) -> &Hub { &global.hubs.vulkan } - fn get_surface(surface: &Surface) -> Option<&Self::Surface> { - surface.raw.downcast_ref::() + fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> { + surface.vulkan.as_ref() } } @@ -67,8 +67,8 @@ impl HalApi for hal::api::Metal { fn hub(global: &Global) -> &Hub { &global.hubs.metal } - fn get_surface(surface: &Surface) -> Option<&Self::Surface> { - surface.raw.downcast_ref::() + fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> { + surface.metal.as_ref() } } @@ -88,8 +88,8 @@ impl HalApi for hal::api::Dx12 { fn hub(global: &Global) -> &Hub { &global.hubs.dx12 } - fn get_surface(surface: &Surface) -> Option<&Self::Surface> { - surface.raw.downcast_ref::() + fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> { + surface.dx12.as_ref() } } @@ -110,7 +110,7 @@ impl HalApi for hal::api::Gles { fn hub(global: &Global) -> &Hub { &global.hubs.gl } - fn get_surface(surface: &Surface) -> Option<&Self::Surface> { - surface.raw.downcast_ref::() + fn surface_as_hal(surface: &Surface) -> Option<&Self::Surface> { + surface.gl.as_ref() } } diff --git a/third_party/rust/wgpu-core/src/hub.rs b/third_party/rust/wgpu-core/src/hub.rs index 0f4589c8b3..eb57411d98 100644 --- a/third_party/rust/wgpu-core/src/hub.rs +++ b/third_party/rust/wgpu-core/src/hub.rs @@ -169,23 +169,23 @@ impl HubReport { /// /// [`A::hub(global)`]: HalApi::hub pub struct Hub { - pub adapters: Registry>, - pub devices: Registry>, - pub queues: Registry>, - pub pipeline_layouts: Registry>, - pub shader_modules: Registry>, - pub bind_group_layouts: Registry>, - pub bind_groups: Registry>, - pub command_buffers: Registry>, - pub render_bundles: Registry>, - pub render_pipelines: Registry>, - pub compute_pipelines: Registry>, - pub query_sets: Registry>, - pub buffers: Registry>, - pub staging_buffers: Registry>, - pub textures: Registry>, - pub texture_views: Registry>, - pub samplers: Registry>, + pub(crate) adapters: Registry>, + pub(crate) devices: Registry>, + pub(crate) queues: Registry>, + pub(crate) pipeline_layouts: Registry>, + pub(crate) shader_modules: Registry>, + pub(crate) bind_group_layouts: Registry>, + pub(crate) bind_groups: Registry>, + pub(crate) command_buffers: Registry>, + pub(crate) render_bundles: Registry>, + pub(crate) render_pipelines: Registry>, + pub(crate) compute_pipelines: Registry>, + pub(crate) query_sets: Registry>, + pub(crate) buffers: Registry>, + pub(crate) staging_buffers: Registry>, + pub(crate) textures: Registry>, + pub(crate) texture_views: Registry>, + pub(crate) samplers: Registry>, } impl Hub { @@ -241,7 +241,7 @@ impl Hub { if let Element::Occupied(ref surface, _epoch) = *element { if let Some(ref mut present) = surface.presentation.lock().take() { if let Some(device) = present.device.downcast_ref::() { - let suf = A::get_surface(surface); + let suf = A::surface_as_hal(surface); unsafe { suf.unwrap().unconfigure(device.raw()); //TODO: we could destroy the surface here diff --git a/third_party/rust/wgpu-core/src/id.rs b/third_party/rust/wgpu-core/src/id.rs index 72b74218d0..1fa89f2bf0 100644 --- a/third_party/rust/wgpu-core/src/id.rs +++ b/third_party/rust/wgpu-core/src/id.rs @@ -91,8 +91,7 @@ pub fn as_option_slice(ids: &[Id]) -> &[Option>] { /// An identifier for a wgpu object. /// -/// An `Id` value identifies a value stored in a [`Global`]'s [`Hub`]'s [`Storage`]. -/// `Storage` implements [`Index`] and [`IndexMut`], accepting `Id` values as indices. +/// An `Id` value identifies a value stored in a [`Global`]'s [`Hub`]. /// /// ## Note on `Id` typing /// @@ -112,10 +111,7 @@ pub fn as_option_slice(ids: &[Id]) -> &[Option>] { /// [`Global`]: crate::global::Global /// [`Hub`]: crate::hub::Hub /// [`Hub`]: crate::hub::Hub -/// [`Storage`]: crate::storage::Storage /// [`Texture`]: crate::resource::Texture -/// [`Index`]: std::ops::Index -/// [`IndexMut`]: std::ops::IndexMut /// [`Registry`]: crate::hub::Registry /// [`Empty`]: hal::api::Empty #[repr(transparent)] @@ -182,15 +178,6 @@ where self.0.backend() } - /// Transmute this identifier to one with a different marker trait. - /// - /// Legal use is governed through a sealed trait, however it's correctness - /// depends on the current implementation of `wgpu-core`. - #[inline] - pub const fn transmute>(self) -> Id { - Id(self.0, PhantomData) - } - #[inline] pub fn zip(index: Index, epoch: Epoch, backend: Backend) -> Self { Id(RawId::zip(index, epoch, backend), PhantomData) @@ -202,20 +189,6 @@ where } } -pub(crate) mod transmute { - // This trait is effectively sealed to prevent illegal transmutes. - pub trait Transmute: super::Marker {} - - // Self-transmute is always legal. - impl Transmute for T where T: super::Marker {} - - // TODO: Remove these once queues have their own identifiers. - impl Transmute for super::markers::Device {} - impl Transmute for super::markers::Queue {} - impl Transmute for super::markers::CommandEncoder {} - impl Transmute for super::markers::CommandBuffer {} -} - impl Copy for Id where T: Marker {} impl Clone for Id @@ -349,6 +322,24 @@ ids! { pub type QuerySetId QuerySet; } +impl CommandEncoderId { + pub fn into_command_buffer_id(self) -> CommandBufferId { + Id(self.0, PhantomData) + } +} + +impl CommandBufferId { + pub fn into_command_encoder_id(self) -> CommandEncoderId { + Id(self.0, PhantomData) + } +} + +impl DeviceId { + pub fn into_queue_id(self) -> QueueId { + Id(self.0, PhantomData) + } +} + #[test] fn test_id_backend() { for &b in &[ diff --git a/third_party/rust/wgpu-core/src/identity.rs b/third_party/rust/wgpu-core/src/identity.rs index d76d29341a..c89731f7af 100644 --- a/third_party/rust/wgpu-core/src/identity.rs +++ b/third_party/rust/wgpu-core/src/identity.rs @@ -1,8 +1,8 @@ -use parking_lot::Mutex; use wgt::Backend; use crate::{ id::{Id, Marker}, + lock::{rank, Mutex}, Epoch, Index, }; use std::{fmt::Debug, marker::PhantomData}; @@ -16,31 +16,26 @@ enum IdSource { /// A simple structure to allocate [`Id`] identifiers. /// -/// Calling [`alloc`] returns a fresh, never-before-seen id. Calling [`free`] +/// Calling [`alloc`] returns a fresh, never-before-seen id. Calling [`release`] /// marks an id as dead; it will never be returned again by `alloc`. /// -/// Use `IdentityManager::default` to construct new instances. +/// `IdentityValues` returns `Id`s whose index values are suitable for use as +/// indices into a `Vec` that holds those ids' referents: /// -/// `IdentityManager` returns `Id`s whose index values are suitable for use as -/// indices into a `Storage` that holds those ids' referents: +/// - Every live id has a distinct index value. Every live id's index +/// selects a distinct element in the vector. /// -/// - Every live id has a distinct index value. Each live id's index selects a -/// distinct element in the vector. -/// -/// - `IdentityManager` prefers low index numbers. If you size your vector to +/// - `IdentityValues` prefers low index numbers. If you size your vector to /// accommodate the indices produced here, the vector's length will reflect /// the highwater mark of actual occupancy. /// -/// - `IdentityManager` reuses the index values of freed ids before returning +/// - `IdentityValues` reuses the index values of freed ids before returning /// ids with new index values. Freed vector entries get reused. /// -/// See the module-level documentation for an overview of how this -/// fits together. -/// /// [`Id`]: crate::id::Id /// [`Backend`]: wgt::Backend; -/// [`alloc`]: IdentityManager::alloc -/// [`free`]: IdentityManager::free +/// [`alloc`]: IdentityValues::alloc +/// [`release`]: IdentityValues::release #[derive(Debug)] pub(super) struct IdentityValues { free: Vec<(Index, Epoch)>, @@ -122,12 +117,15 @@ impl IdentityManager { impl IdentityManager { pub fn new() -> Self { Self { - values: Mutex::new(IdentityValues { - free: Vec::new(), - next_index: 0, - count: 0, - id_source: IdSource::None, - }), + values: Mutex::new( + rank::IDENTITY_MANAGER_VALUES, + IdentityValues { + free: Vec::new(), + next_index: 0, + count: 0, + id_source: IdSource::None, + }, + ), _phantom: PhantomData, } } diff --git a/third_party/rust/wgpu-core/src/instance.rs b/third_party/rust/wgpu-core/src/instance.rs index b909245fac..f0a3890c1e 100644 --- a/third_party/rust/wgpu-core/src/instance.rs +++ b/third_party/rust/wgpu-core/src/instance.rs @@ -1,19 +1,19 @@ +use std::collections::HashMap; use std::sync::Arc; use crate::{ - any_surface::AnySurface, api_log, device::{queue::Queue, resource::Device, DeviceDescriptor}, global::Global, hal_api::HalApi, id::markers, id::{AdapterId, DeviceId, Id, Marker, QueueId, SurfaceId}, + lock::{rank, Mutex}, present::Presentation, resource::{Resource, ResourceInfo, ResourceType}, resource_log, LabelHelpers, DOWNLEVEL_WARNING_MESSAGE, }; -use parking_lot::Mutex; use wgt::{Backend, Backends, PowerPreference}; use hal::{Adapter as _, Instance as _, OpenDevice}; @@ -21,6 +21,7 @@ use thiserror::Error; pub type RequestAdapterOptions = wgt::RequestAdapterOptions; type HalInstance = ::Instance; +type HalSurface = ::Surface; #[derive(Clone, Debug, Error)] #[error("Limit '{name}' value {requested} is better than allowed {allowed}")] @@ -113,31 +114,36 @@ impl Instance { } pub(crate) fn destroy_surface(&self, surface: Surface) { - fn destroy(instance: &Option, surface: AnySurface) { - unsafe { - if let Some(suf) = surface.take::() { - instance.as_ref().unwrap().destroy_surface(suf); + fn destroy(instance: &Option, mut surface: Option>) { + if let Some(surface) = surface.take() { + unsafe { + instance.as_ref().unwrap().destroy_surface(surface); } } } - match surface.raw.backend() { - #[cfg(vulkan)] - Backend::Vulkan => destroy::(&self.vulkan, surface.raw), - #[cfg(metal)] - Backend::Metal => destroy::(&self.metal, surface.raw), - #[cfg(dx12)] - Backend::Dx12 => destroy::(&self.dx12, surface.raw), - #[cfg(gles)] - Backend::Gl => destroy::(&self.gl, surface.raw), - _ => unreachable!(), - } + #[cfg(vulkan)] + destroy::(&self.vulkan, surface.vulkan); + #[cfg(metal)] + destroy::(&self.metal, surface.metal); + #[cfg(dx12)] + destroy::(&self.dx12, surface.dx12); + #[cfg(gles)] + destroy::(&self.gl, surface.gl); } } pub struct Surface { pub(crate) presentation: Mutex>, pub(crate) info: ResourceInfo, - pub(crate) raw: AnySurface, + + #[cfg(vulkan)] + pub vulkan: Option>, + #[cfg(metal)] + pub metal: Option>, + #[cfg(dx12)] + pub dx12: Option>, + #[cfg(gles)] + pub gl: Option>, } impl Resource for Surface { @@ -163,7 +169,7 @@ impl Surface { &self, adapter: &Adapter, ) -> Result { - let suf = A::get_surface(self).ok_or(GetSurfaceSupportError::Unsupported)?; + let suf = A::surface_as_hal(self).ok_or(GetSurfaceSupportError::Unsupported)?; profiling::scope!("surface_capabilities"); let caps = unsafe { adapter @@ -203,7 +209,7 @@ impl Adapter { } pub fn is_surface_supported(&self, surface: &Surface) -> bool { - let suf = A::get_surface(surface); + let suf = A::surface_as_hal(surface); // If get_surface returns None, then the API does not advertise support for the surface. // @@ -461,13 +467,25 @@ pub enum RequestAdapterError { #[derive(Clone, Debug, Error)] #[non_exhaustive] pub enum CreateSurfaceError { - #[error("No backend is available")] - NoSupportedBackend, - #[error(transparent)] - InstanceError(#[from] hal::InstanceError), + #[error("The backend {0} was not enabled on the instance.")] + BackendNotEnabled(Backend), + #[error("Failed to create surface for any enabled backend: {0:?}")] + FailedToCreateSurfaceForAnyBackend(HashMap), } impl Global { + /// Creates a new surface targeting the given display/window handles. + /// + /// Internally attempts to create hal surfaces for all enabled backends. + /// + /// Fails only if creation for surfaces for all enabled backends fails in which case + /// the error for each enabled backend is listed. + /// Vice versa, if creation for any backend succeeds, success is returned. + /// Surface creation errors are logged to the debug log in any case. + /// + /// id_in: + /// - If `Some`, the id to assign to the surface. A new one will be generated otherwise. + /// /// # Safety /// /// - `display_handle` must be a valid object to create a surface upon. @@ -483,50 +501,86 @@ impl Global { profiling::scope!("Instance::create_surface"); fn init( + errors: &mut HashMap, + any_created: &mut bool, + backend: Backend, inst: &Option, display_handle: raw_window_handle::RawDisplayHandle, window_handle: raw_window_handle::RawWindowHandle, - ) -> Option> { - inst.as_ref().map(|inst| unsafe { - match inst.create_surface(display_handle, window_handle) { - Ok(raw) => Ok(AnySurface::new::(raw)), - Err(e) => Err(e), + ) -> Option> { + inst.as_ref().and_then(|inst| { + match unsafe { inst.create_surface(display_handle, window_handle) } { + Ok(raw) => { + *any_created = true; + Some(raw) + } + Err(err) => { + log::debug!( + "Instance::create_surface: failed to create surface for {:?}: {:?}", + backend, + err + ); + errors.insert(backend, err); + None + } } }) } - let mut hal_surface: Option> = None; - - #[cfg(vulkan)] - if hal_surface.is_none() { - hal_surface = - init::(&self.instance.vulkan, display_handle, window_handle); - } - #[cfg(metal)] - if hal_surface.is_none() { - hal_surface = - init::(&self.instance.metal, display_handle, window_handle); - } - #[cfg(dx12)] - if hal_surface.is_none() { - hal_surface = - init::(&self.instance.dx12, display_handle, window_handle); - } - #[cfg(gles)] - if hal_surface.is_none() { - hal_surface = init::(&self.instance.gl, display_handle, window_handle); - } - - let hal_surface = hal_surface.ok_or(CreateSurfaceError::NoSupportedBackend)??; + let mut errors = HashMap::default(); + let mut any_created = false; let surface = Surface { - presentation: Mutex::new(None), + presentation: Mutex::new(rank::SURFACE_PRESENTATION, None), info: ResourceInfo::new("", None), - raw: hal_surface, + + #[cfg(vulkan)] + vulkan: init::( + &mut errors, + &mut any_created, + Backend::Vulkan, + &self.instance.vulkan, + display_handle, + window_handle, + ), + #[cfg(metal)] + metal: init::( + &mut errors, + &mut any_created, + Backend::Metal, + &self.instance.metal, + display_handle, + window_handle, + ), + #[cfg(dx12)] + dx12: init::( + &mut errors, + &mut any_created, + Backend::Dx12, + &self.instance.dx12, + display_handle, + window_handle, + ), + #[cfg(gles)] + gl: init::( + &mut errors, + &mut any_created, + Backend::Gl, + &self.instance.gl, + display_handle, + window_handle, + ), }; - let (id, _) = self.surfaces.prepare(id_in).assign(surface); - Ok(id) + if any_created { + #[allow(clippy::arc_with_non_send_sync)] + let (id, _) = self.surfaces.prepare(id_in).assign(Arc::new(surface)); + Ok(id) + } else { + Err(CreateSurfaceError::FailedToCreateSurfaceForAnyBackend( + errors, + )) + } } /// # Safety @@ -537,29 +591,57 @@ impl Global { &self, layer: *mut std::ffi::c_void, id_in: Option, - ) -> SurfaceId { + ) -> Result { profiling::scope!("Instance::create_surface_metal"); let surface = Surface { - presentation: Mutex::new(None), + presentation: Mutex::new(rank::SURFACE_PRESENTATION, None), info: ResourceInfo::new("", None), - raw: { - let hal_surface = self - .instance - .metal + metal: Some(self.instance.metal.as_ref().map_or( + Err(CreateSurfaceError::BackendNotEnabled(Backend::Metal)), + |inst| { + // we don't want to link to metal-rs for this + #[allow(clippy::transmute_ptr_to_ref)] + Ok(inst.create_surface_from_layer(unsafe { std::mem::transmute(layer) })) + }, + )?), + #[cfg(dx12)] + dx12: None, + #[cfg(vulkan)] + vulkan: None, + #[cfg(gles)] + gl: None, + }; + + let (id, _) = self.surfaces.prepare(id_in).assign(Arc::new(surface)); + Ok(id) + } + + #[cfg(dx12)] + fn instance_create_surface_dx12( + &self, + id_in: Option, + create_surface_func: impl FnOnce(&HalInstance) -> HalSurface, + ) -> Result { + let surface = Surface { + presentation: Mutex::new(rank::SURFACE_PRESENTATION, None), + info: ResourceInfo::new("", None), + dx12: Some(create_surface_func( + self.instance + .dx12 .as_ref() - .map(|inst| { - // we don't want to link to metal-rs for this - #[allow(clippy::transmute_ptr_to_ref)] - inst.create_surface_from_layer(unsafe { std::mem::transmute(layer) }) - }) - .unwrap(); - AnySurface::new::(hal_surface) - }, + .ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Dx12))?, + )), + #[cfg(metal)] + metal: None, + #[cfg(vulkan)] + vulkan: None, + #[cfg(gles)] + gl: None, }; - let (id, _) = self.surfaces.prepare(id_in).assign(surface); - id + let (id, _) = self.surfaces.prepare(id_in).assign(Arc::new(surface)); + Ok(id) } #[cfg(dx12)] @@ -570,25 +652,11 @@ impl Global { &self, visual: *mut std::ffi::c_void, id_in: Option, - ) -> SurfaceId { + ) -> Result { profiling::scope!("Instance::instance_create_surface_from_visual"); - - let surface = Surface { - presentation: Mutex::new(None), - info: ResourceInfo::new("", None), - raw: { - let hal_surface = self - .instance - .dx12 - .as_ref() - .map(|inst| unsafe { inst.create_surface_from_visual(visual as _) }) - .unwrap(); - AnySurface::new::(hal_surface) - }, - }; - - let (id, _) = self.surfaces.prepare(id_in).assign(surface); - id + self.instance_create_surface_dx12(id_in, |inst| unsafe { + inst.create_surface_from_visual(visual as _) + }) } #[cfg(dx12)] @@ -599,25 +667,11 @@ impl Global { &self, surface_handle: *mut std::ffi::c_void, id_in: Option, - ) -> SurfaceId { + ) -> Result { profiling::scope!("Instance::instance_create_surface_from_surface_handle"); - - let surface = Surface { - presentation: Mutex::new(None), - info: ResourceInfo::new("", None), - raw: { - let hal_surface = self - .instance - .dx12 - .as_ref() - .map(|inst| unsafe { inst.create_surface_from_surface_handle(surface_handle) }) - .unwrap(); - AnySurface::new::(hal_surface) - }, - }; - - let (id, _) = self.surfaces.prepare(id_in).assign(surface); - id + self.instance_create_surface_dx12(id_in, |inst| unsafe { + inst.create_surface_from_surface_handle(surface_handle) + }) } #[cfg(dx12)] @@ -628,27 +682,11 @@ impl Global { &self, swap_chain_panel: *mut std::ffi::c_void, id_in: Option, - ) -> SurfaceId { + ) -> Result { profiling::scope!("Instance::instance_create_surface_from_swap_chain_panel"); - - let surface = Surface { - presentation: Mutex::new(None), - info: ResourceInfo::new("", None), - raw: { - let hal_surface = self - .instance - .dx12 - .as_ref() - .map(|inst| unsafe { - inst.create_surface_from_swap_chain_panel(swap_chain_panel as _) - }) - .unwrap(); - AnySurface::new::(hal_surface) - }, - }; - - let (id, _) = self.surfaces.prepare(id_in).assign(surface); - id + self.instance_create_surface_dx12(id_in, |inst| unsafe { + inst.create_surface_from_swap_chain_panel(swap_chain_panel as _) + }) } pub fn surface_drop(&self, id: SurfaceId) { @@ -656,32 +694,34 @@ impl Global { api_log!("Surface::drop {id:?}"); - fn unconfigure(global: &Global, surface: &AnySurface, present: &Presentation) { - let hub = HalApi::hub(global); - if let Some(hal_surface) = surface.downcast_ref::() { + fn unconfigure( + global: &Global, + surface: &Option>, + present: &Presentation, + ) { + if let Some(surface) = surface { + let hub = HalApi::hub(global); if let Some(device) = present.device.downcast_ref::() { - hub.surface_unconfigure(device, hal_surface); + hub.surface_unconfigure(device, surface); } } } let surface = self.surfaces.unregister(id); - if let Some(surface) = Arc::into_inner(surface.unwrap()) { - if let Some(present) = surface.presentation.lock().take() { - #[cfg(vulkan)] - unconfigure::(self, &surface.raw, &present); - #[cfg(metal)] - unconfigure::(self, &surface.raw, &present); - #[cfg(dx12)] - unconfigure::(self, &surface.raw, &present); - #[cfg(gles)] - unconfigure::(self, &surface.raw, &present); - } + let surface = Arc::into_inner(surface.unwrap()) + .expect("Surface cannot be destroyed because is still in use"); - self.instance.destroy_surface(surface); - } else { - panic!("Surface cannot be destroyed because is still in use"); + if let Some(present) = surface.presentation.lock().take() { + #[cfg(vulkan)] + unconfigure::(self, &surface.vulkan, &present); + #[cfg(metal)] + unconfigure::(self, &surface.metal, &present); + #[cfg(dx12)] + unconfigure::(self, &surface.dx12, &present); + #[cfg(gles)] + unconfigure::(self, &surface.gl, &present); } + self.instance.destroy_surface(surface); } fn enumerate( @@ -707,7 +747,7 @@ impl Global { for raw in hal_adapters { let adapter = Adapter::new(raw); log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info); - let (id, _) = hub.adapters.prepare(id_backend).assign(adapter); + let (id, _) = hub.adapters.prepare(id_backend).assign(Arc::new(adapter)); list.push(id); } } @@ -754,7 +794,10 @@ impl Global { None => { let adapter = Adapter::new(list.swap_remove(*selected)); log::info!("Adapter {:?} {:?}", A::VARIANT, adapter.raw.info); - let (id, _) = HalApi::hub(self).adapters.prepare(new_id).assign(adapter); + let (id, _) = HalApi::hub(self) + .adapters + .prepare(new_id) + .assign(Arc::new(adapter)); Some(id) } } @@ -784,7 +827,7 @@ impl Global { adapters.retain(|exposed| exposed.info.device_type == wgt::DeviceType::Cpu); } if let Some(surface) = compatible_surface { - let surface = &A::get_surface(surface); + let surface = &A::surface_as_hal(surface); adapters.retain(|exposed| unsafe { // If the surface does not exist for this backend, // then the surface is not supported. @@ -937,13 +980,13 @@ impl Global { let (id, _adapter): (_, Arc>) = match A::VARIANT { #[cfg(vulkan)] - Backend::Vulkan => fid.assign(Adapter::new(hal_adapter)), + Backend::Vulkan => fid.assign(Arc::new(Adapter::new(hal_adapter))), #[cfg(metal)] - Backend::Metal => fid.assign(Adapter::new(hal_adapter)), + Backend::Metal => fid.assign(Arc::new(Adapter::new(hal_adapter))), #[cfg(dx12)] - Backend::Dx12 => fid.assign(Adapter::new(hal_adapter)), + Backend::Dx12 => fid.assign(Arc::new(Adapter::new(hal_adapter))), #[cfg(gles)] - Backend::Gl => fid.assign(Adapter::new(hal_adapter)), + Backend::Gl => fid.assign(Arc::new(Adapter::new(hal_adapter))), _ => unreachable!(), }; resource_log!("Created Adapter {:?}", id); @@ -1066,13 +1109,13 @@ impl Global { Ok((device, queue)) => (device, queue), Err(e) => break e, }; - let (device_id, _) = device_fid.assign(device); + let (device_id, _) = device_fid.assign(Arc::new(device)); resource_log!("Created Device {:?}", device_id); let device = hub.devices.get(device_id).unwrap(); queue.device = Some(device.clone()); - let (queue_id, queue) = queue_fid.assign(queue); + let (queue_id, queue) = queue_fid.assign(Arc::new(queue)); resource_log!("Created Queue {:?}", queue_id); device.set_queue(queue); @@ -1118,13 +1161,13 @@ impl Global { Ok(device) => device, Err(e) => break e, }; - let (device_id, _) = devices_fid.assign(device); + let (device_id, _) = devices_fid.assign(Arc::new(device)); resource_log!("Created Device {:?}", device_id); let device = hub.devices.get(device_id).unwrap(); queue.device = Some(device.clone()); - let (queue_id, queue) = queues_fid.assign(queue); + let (queue_id, queue) = queues_fid.assign(Arc::new(queue)); resource_log!("Created Queue {:?}", queue_id); device.set_queue(queue); diff --git a/third_party/rust/wgpu-core/src/lib.rs b/third_party/rust/wgpu-core/src/lib.rs index 5454f0d682..032d85a4bc 100644 --- a/third_party/rust/wgpu-core/src/lib.rs +++ b/third_party/rust/wgpu-core/src/lib.rs @@ -39,6 +39,8 @@ unused_braces, // It gets in the way a lot and does not prevent bugs in practice. clippy::pattern_type_mismatch, + // `wgpu-core` isn't entirely user-facing, so it's useful to document internal items. + rustdoc::private_intra_doc_links )] #![warn( trivial_casts, @@ -48,7 +50,6 @@ unused_qualifications )] -pub mod any_surface; pub mod binding_model; pub mod command; mod conv; @@ -62,6 +63,7 @@ pub mod id; pub mod identity; mod init_tracker; pub mod instance; +mod lock; pub mod pipeline; mod pool; pub mod present; diff --git a/third_party/rust/wgpu-core/src/lock/mod.rs b/third_party/rust/wgpu-core/src/lock/mod.rs new file mode 100644 index 0000000000..a6593a062d --- /dev/null +++ b/third_party/rust/wgpu-core/src/lock/mod.rs @@ -0,0 +1,41 @@ +//! Instrumented lock types. +//! +//! This module defines a set of instrumented wrappers for the lock +//! types used in `wgpu-core` ([`Mutex`], [`RwLock`], and +//! [`SnatchLock`]) that help us understand and validate `wgpu-core` +//! synchronization. +//! +//! - The [`ranked`] module defines lock types that perform run-time +//! checks to ensure that each thread acquires locks only in a +//! specific order, to prevent deadlocks. +//! +//! - The [`vanilla`] module defines lock types that are +//! uninstrumented, no-overhead wrappers around the standard lock +//! types. +//! +//! (We plan to add more wrappers in the future.) +//! +//! If the `wgpu_validate_locks` config is set (for example, with +//! `RUSTFLAGS='--cfg wgpu_validate_locks'`), `wgpu-core` uses the +//! [`ranked`] module's locks. We hope to make this the default for +//! debug builds soon. +//! +//! Otherwise, `wgpu-core` uses the [`vanilla`] module's locks. +//! +//! [`Mutex`]: parking_lot::Mutex +//! [`RwLock`]: parking_lot::RwLock +//! [`SnatchLock`]: crate::snatch::SnatchLock + +pub mod rank; + +#[cfg_attr(not(wgpu_validate_locks), allow(dead_code))] +mod ranked; + +#[cfg_attr(wgpu_validate_locks, allow(dead_code))] +mod vanilla; + +#[cfg(wgpu_validate_locks)] +pub use ranked::{Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard}; + +#[cfg(not(wgpu_validate_locks))] +pub use vanilla::{Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard}; diff --git a/third_party/rust/wgpu-core/src/lock/rank.rs b/third_party/rust/wgpu-core/src/lock/rank.rs new file mode 100644 index 0000000000..4387b8d138 --- /dev/null +++ b/third_party/rust/wgpu-core/src/lock/rank.rs @@ -0,0 +1,170 @@ +//! Ranks for `wgpu-core` locks, restricting acquisition order. +//! +//! See [`LockRank`]. + +/// The rank of a lock. +/// +/// Each [`Mutex`], [`RwLock`], and [`SnatchLock`] in `wgpu-core` has been +/// assigned a *rank*: a node in the DAG defined at the bottom of +/// `wgpu-core/src/lock/rank.rs`. The rank of the most recently +/// acquired lock you are still holding determines which locks you may +/// attempt to acquire next. +/// +/// When you create a lock in `wgpu-core`, you must specify its rank +/// by passing in a [`LockRank`] value. This module declares a +/// pre-defined set of ranks to cover everything in `wgpu-core`, named +/// after the type in which they occur, and the name of the type's +/// field that is a lock. For example, [`CommandBuffer::data`] is a +/// `Mutex`, and its rank here is the constant +/// [`COMMAND_BUFFER_DATA`]. +/// +/// [`Mutex`]: parking_lot::Mutex +/// [`RwLock`]: parking_lot::RwLock +/// [`SnatchLock`]: crate::snatch::SnatchLock +/// [`CommandBuffer::data`]: crate::command::CommandBuffer::data +#[derive(Debug, Copy, Clone)] +pub struct LockRank { + /// The bit representing this lock. + /// + /// There should only be a single bit set in this value. + pub(super) bit: LockRankSet, + + /// A bitmask of permitted successor ranks. + /// + /// If `rank` is the rank of the most recently acquired lock we + /// are still holding, then `rank.followers` is the mask of + /// locks we are allowed to acquire next. + /// + /// The `define_lock_ranks!` macro ensures that there are no + /// cycles in the graph of lock ranks and their followers. + pub(super) followers: LockRankSet, +} + +/// Define a set of lock ranks, and each rank's permitted successors. +macro_rules! define_lock_ranks { + { + $( + $( #[ $attr:meta ] )* + rank $name:ident $member:literal followed by { $( $follower:ident ),* $(,)? } + )* + } => { + // An enum that assigns a unique number to each rank. + #[allow(non_camel_case_types, clippy::upper_case_acronyms)] + enum LockRankNumber { $( $name, )* } + + bitflags::bitflags! { + #[derive(Debug, Copy, Clone, Eq, PartialEq)] + /// A bitflags type representing a set of lock ranks. + pub struct LockRankSet: u64 { + $( + const $name = 1 << (LockRankNumber:: $name as u64); + )* + } + } + + impl LockRankSet { + pub fn name(self) -> &'static str { + match self { + $( + LockRankSet:: $name => $member, + )* + _ => "", + } + } + } + + $( + // If there is any cycle in the ranking, the initializers + // for `followers` will be cyclic, and rustc will give us + // an error message explaining the cycle. + $( #[ $attr ] )* + pub const $name: LockRank = LockRank { + bit: LockRankSet:: $name, + followers: LockRankSet::empty() $( .union($follower.bit) )*, + }; + )* + } +} + +define_lock_ranks! { + rank DEVICE_TEMP_SUSPECTED "Device::temp_suspected" followed by { + SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + COMMAND_BUFFER_DATA, + DEVICE_TRACKERS, + } + rank COMMAND_BUFFER_DATA "CommandBuffer::data" followed by { + DEVICE_SNATCHABLE_LOCK, + DEVICE_USAGE_SCOPES, + SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + BUFFER_BIND_GROUP_STATE_BUFFERS, + TEXTURE_BIND_GROUP_STATE_TEXTURES, + BUFFER_MAP_STATE, + STATELESS_BIND_GROUP_STATE_RESOURCES, + } + rank DEVICE_SNATCHABLE_LOCK "Device::snatchable_lock" followed by { + SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + DEVICE_TRACE, + BUFFER_MAP_STATE, + BUFFER_BIND_GROUP_STATE_BUFFERS, + TEXTURE_BIND_GROUP_STATE_TEXTURES, + STATELESS_BIND_GROUP_STATE_RESOURCES, + // Uncomment this to see an interesting cycle. + // COMMAND_BUFFER_DATA, + } + rank BUFFER_MAP_STATE "Buffer::map_state" followed by { + DEVICE_PENDING_WRITES, + SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + DEVICE_TRACE, + } + rank DEVICE_PENDING_WRITES "Device::pending_writes" followed by { + COMMAND_ALLOCATOR_FREE_ENCODERS, + SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + DEVICE_LIFE_TRACKER, + } + rank DEVICE_LIFE_TRACKER "Device::life_tracker" followed by { + COMMAND_ALLOCATOR_FREE_ENCODERS, + // Uncomment this to see an interesting cycle. + // DEVICE_TEMP_SUSPECTED, + DEVICE_TRACE, + } + rank COMMAND_ALLOCATOR_FREE_ENCODERS "CommandAllocator::free_encoders" followed by { + SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + } + + rank BUFFER_BIND_GROUPS "Buffer::bind_groups" followed by { } + rank BUFFER_BIND_GROUP_STATE_BUFFERS "BufferBindGroupState::buffers" followed by { } + rank BUFFER_INITIALIZATION_STATUS "Buffer::initialization_status" followed by { } + rank BUFFER_SYNC_MAPPED_WRITES "Buffer::sync_mapped_writes" followed by { } + rank DEVICE_DEFERRED_DESTROY "Device::deferred_destroy" followed by { } + rank DEVICE_FENCE "Device::fence" followed by { } + #[allow(dead_code)] + rank DEVICE_TRACE "Device::trace" followed by { } + rank DEVICE_TRACKERS "Device::trackers" followed by { } + rank DEVICE_USAGE_SCOPES "Device::usage_scopes" followed by { } + rank IDENTITY_MANAGER_VALUES "IdentityManager::values" followed by { } + rank REGISTRY_STORAGE "Registry::storage" followed by { } + rank RENDER_BUNDLE_SCOPE_BUFFERS "RenderBundleScope::buffers" followed by { } + rank RENDER_BUNDLE_SCOPE_TEXTURES "RenderBundleScope::textures" followed by { } + rank RENDER_BUNDLE_SCOPE_BIND_GROUPS "RenderBundleScope::bind_groups" followed by { } + rank RENDER_BUNDLE_SCOPE_RENDER_PIPELINES "RenderBundleScope::render_pipelines" followed by { } + rank RENDER_BUNDLE_SCOPE_QUERY_SETS "RenderBundleScope::query_sets" followed by { } + rank RESOURCE_POOL_INNER "ResourcePool::inner" followed by { } + rank SHARED_TRACKER_INDEX_ALLOCATOR_INNER "SharedTrackerIndexAllocator::inner" followed by { } + rank STAGING_BUFFER_RAW "StagingBuffer::raw" followed by { } + rank STATELESS_BIND_GROUP_STATE_RESOURCES "StatelessBindGroupState::resources" followed by { } + rank SURFACE_PRESENTATION "Surface::presentation" followed by { } + rank TEXTURE_BIND_GROUPS "Texture::bind_groups" followed by { } + rank TEXTURE_BIND_GROUP_STATE_TEXTURES "TextureBindGroupState::textures" followed by { } + rank TEXTURE_INITIALIZATION_STATUS "Texture::initialization_status" followed by { } + rank TEXTURE_CLEAR_MODE "Texture::clear_mode" followed by { } + rank TEXTURE_VIEWS "Texture::views" followed by { } + + #[cfg(test)] + rank PAWN "pawn" followed by { ROOK, BISHOP } + #[cfg(test)] + rank ROOK "rook" followed by { KNIGHT } + #[cfg(test)] + rank KNIGHT "knight" followed by { } + #[cfg(test)] + rank BISHOP "bishop" followed by { } +} diff --git a/third_party/rust/wgpu-core/src/lock/ranked.rs b/third_party/rust/wgpu-core/src/lock/ranked.rs new file mode 100644 index 0000000000..4237116c2c --- /dev/null +++ b/third_party/rust/wgpu-core/src/lock/ranked.rs @@ -0,0 +1,397 @@ +//! Lock types that enforce well-ranked lock acquisition order. +//! +//! This module's [`Mutex`] and [`RwLock` types are instrumented to check that +//! `wgpu-core` acquires locks according to their rank, to prevent deadlocks. To +//! use it, put `--cfg wgpu_validate_locks` in `RUSTFLAGS`. +//! +//! The [`LockRank`] constants in the [`lock::rank`] module describe edges in a +//! directed graph of lock acquisitions: each lock's rank says, if this is the most +//! recently acquired lock that you are still holding, then these are the locks you +//! are allowed to acquire next. +//! +//! As long as this graph doesn't have cycles, any number of threads can acquire +//! locks along paths through the graph without deadlock: +//! +//! - Assume that if a thread is holding a lock, then it will either release it, +//! or block trying to acquire another one. No thread just sits on its locks +//! forever for unrelated reasons. If it did, then that would be a source of +//! deadlock "outside the system" that we can't do anything about. +//! +//! - This module asserts that threads acquire and release locks in a stack-like +//! order: a lock is dropped only when it is the *most recently acquired* lock +//! *still held* - call this the "youngest" lock. This stack-like ordering +//! isn't a Rust requirement; Rust lets you drop guards in any order you like. +//! This is a restriction we impose. +//! +//! - Consider the directed graph whose nodes are locks, and whose edges go from +//! each lock to its permitted followers, the locks in its [`LockRank::followers`] +//! set. The definition of the [`lock::rank`] module's [`LockRank`] constants +//! ensures that this graph has no cycles, including trivial cycles from a node to +//! itself. +//! +//! - This module then asserts that each thread attempts to acquire a lock only if +//! it is among its youngest lock's permitted followers. Thus, as a thread +//! acquires locks, it must be traversing a path through the graph along its +//! edges. +//! +//! - Because there are no cycles in the graph, whenever one thread is blocked +//! waiting to acquire a lock, that lock must be held by a different thread: if +//! you were allowed to acquire a lock you already hold, that would be a cycle in +//! the graph. +//! +//! - Furthermore, because the graph has no cycles, as we work our way from each +//! thread to the thread it is blocked waiting for, we must eventually reach an +//! end point: there must be some thread that is able to acquire its next lock, or +//! that is about to release a lock. +//! +//! Thus, the system as a whole is always able to make progress: it is free of +//! deadlocks. +//! +//! Note that this validation only monitors each thread's behavior in isolation: +//! there's only thread-local state, nothing communicated between threads. So we +//! don't detect deadlocks, per se, only the potential to cause deadlocks. This +//! means that the validation is conservative, but more reproducible, since it's not +//! dependent on any particular interleaving of execution. +//! +//! [`lock::rank`]: crate::lock::rank + +use super::rank::LockRank; +use std::{cell::Cell, panic::Location}; + +/// A `Mutex` instrumented for deadlock prevention. +/// +/// This is just a wrapper around a [`parking_lot::Mutex`], along with +/// its rank in the `wgpu_core` lock ordering. +/// +/// For details, see [the module documentation][mod]. +/// +/// [mod]: crate::lock::ranked +pub struct Mutex { + inner: parking_lot::Mutex, + rank: LockRank, +} + +/// A guard produced by locking [`Mutex`]. +/// +/// This is just a wrapper around a [`parking_lot::MutexGuard`], along +/// with the state needed to track lock acquisition. +/// +/// For details, see [the module documentation][mod]. +/// +/// [mod]: crate::lock::ranked +pub struct MutexGuard<'a, T> { + inner: parking_lot::MutexGuard<'a, T>, + saved: LockStateGuard, +} + +thread_local! { + static LOCK_STATE: Cell = const { Cell::new(LockState::INITIAL) }; +} + +/// Per-thread state for the deadlock checker. +#[derive(Debug, Copy, Clone)] +struct LockState { + /// The last lock we acquired, and where. + last_acquired: Option<(LockRank, &'static Location<'static>)>, + + /// The number of locks currently held. + /// + /// This is used to enforce stack-like lock acquisition and release. + depth: u32, +} + +impl LockState { + const INITIAL: LockState = LockState { + last_acquired: None, + depth: 0, + }; +} + +/// A container that restores a [`LockState`] when dropped. +/// +/// This type serves two purposes: +/// +/// - Operations like `RwLockWriteGuard::downgrade` would like to be able to +/// destructure lock guards and reassemble their pieces into new guards, but +/// if the guard type itself implements `Drop`, we can't destructure it +/// without unsafe code or pointless `Option`s whose state is almost always +/// statically known. +/// +/// - We can just implement `Drop` for this type once, and then use it in lock +/// guards, rather than implementing `Drop` separately for each guard type. +struct LockStateGuard(LockState); + +impl Drop for LockStateGuard { + fn drop(&mut self) { + release(self.0) + } +} + +/// Check and record the acquisition of a lock with `new_rank`. +/// +/// Check that acquiring a lock with `new_rank` is permitted at this point, and +/// update the per-thread state accordingly. +/// +/// Return the `LockState` that must be restored when this thread is released. +fn acquire(new_rank: LockRank, location: &'static Location<'static>) -> LockState { + let state = LOCK_STATE.get(); + // Initially, it's fine to acquire any lock. So we only + // need to check when `last_acquired` is `Some`. + if let Some((ref last_rank, ref last_location)) = state.last_acquired { + assert!( + last_rank.followers.contains(new_rank.bit), + "Attempt to acquire nested mutexes in wrong order:\n\ + last locked {:<35} at {}\n\ + now locking {:<35} at {}\n\ + Locking {} after locking {} is not permitted.", + last_rank.bit.name(), + last_location, + new_rank.bit.name(), + location, + new_rank.bit.name(), + last_rank.bit.name(), + ); + } + LOCK_STATE.set(LockState { + last_acquired: Some((new_rank, location)), + depth: state.depth + 1, + }); + state +} + +/// Record the release of a lock whose saved state was `saved`. +/// +/// Check that locks are being acquired in stacking order, and update the +/// per-thread state accordingly. +fn release(saved: LockState) { + let prior = LOCK_STATE.replace(saved); + + // Although Rust allows mutex guards to be dropped in any + // order, this analysis requires that locks be acquired and + // released in stack order: the next lock to be released must be + // the most recently acquired lock still held. + assert_eq!( + prior.depth, + saved.depth + 1, + "Lock not released in stacking order" + ); +} + +impl Mutex { + pub fn new(rank: LockRank, value: T) -> Mutex { + Mutex { + inner: parking_lot::Mutex::new(value), + rank, + } + } + + #[track_caller] + pub fn lock(&self) -> MutexGuard { + let saved = acquire(self.rank, Location::caller()); + MutexGuard { + inner: self.inner.lock(), + saved: LockStateGuard(saved), + } + } +} + +impl<'a, T> std::ops::Deref for MutexGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } +} + +impl<'a, T> std::ops::DerefMut for MutexGuard<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.inner.deref_mut() + } +} + +impl std::fmt::Debug for Mutex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.inner.fmt(f) + } +} + +/// An `RwLock` instrumented for deadlock prevention. +/// +/// This is just a wrapper around a [`parking_lot::RwLock`], along with +/// its rank in the `wgpu_core` lock ordering. +/// +/// For details, see [the module documentation][mod]. +/// +/// [mod]: crate::lock::ranked +pub struct RwLock { + inner: parking_lot::RwLock, + rank: LockRank, +} + +/// A read guard produced by locking [`RwLock`] for reading. +/// +/// This is just a wrapper around a [`parking_lot::RwLockReadGuard`], along with +/// the state needed to track lock acquisition. +/// +/// For details, see [the module documentation][mod]. +/// +/// [mod]: crate::lock::ranked +pub struct RwLockReadGuard<'a, T> { + inner: parking_lot::RwLockReadGuard<'a, T>, + saved: LockStateGuard, +} + +/// A write guard produced by locking [`RwLock`] for writing. +/// +/// This is just a wrapper around a [`parking_lot::RwLockWriteGuard`], along +/// with the state needed to track lock acquisition. +/// +/// For details, see [the module documentation][mod]. +/// +/// [mod]: crate::lock::ranked +pub struct RwLockWriteGuard<'a, T> { + inner: parking_lot::RwLockWriteGuard<'a, T>, + saved: LockStateGuard, +} + +impl RwLock { + pub fn new(rank: LockRank, value: T) -> RwLock { + RwLock { + inner: parking_lot::RwLock::new(value), + rank, + } + } + + #[track_caller] + pub fn read(&self) -> RwLockReadGuard { + let saved = acquire(self.rank, Location::caller()); + RwLockReadGuard { + inner: self.inner.read(), + saved: LockStateGuard(saved), + } + } + + #[track_caller] + pub fn write(&self) -> RwLockWriteGuard { + let saved = acquire(self.rank, Location::caller()); + RwLockWriteGuard { + inner: self.inner.write(), + saved: LockStateGuard(saved), + } + } +} + +impl<'a, T> RwLockWriteGuard<'a, T> { + pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> { + RwLockReadGuard { + inner: parking_lot::RwLockWriteGuard::downgrade(this.inner), + saved: this.saved, + } + } +} + +impl std::fmt::Debug for RwLock { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.inner.fmt(f) + } +} + +impl<'a, T> std::ops::Deref for RwLockReadGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } +} + +impl<'a, T> std::ops::Deref for RwLockWriteGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } +} + +impl<'a, T> std::ops::DerefMut for RwLockWriteGuard<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.inner.deref_mut() + } +} + +/// Locks can be acquired in the order indicated by their ranks. +#[test] +fn permitted() { + use super::rank; + + let lock1 = Mutex::new(rank::PAWN, ()); + let lock2 = Mutex::new(rank::ROOK, ()); + + let _guard1 = lock1.lock(); + let _guard2 = lock2.lock(); +} + +/// Locks can only be acquired in the order indicated by their ranks. +#[test] +#[should_panic(expected = "Locking pawn after locking rook")] +fn forbidden_unrelated() { + use super::rank; + + let lock1 = Mutex::new(rank::ROOK, ()); + let lock2 = Mutex::new(rank::PAWN, ()); + + let _guard1 = lock1.lock(); + let _guard2 = lock2.lock(); +} + +/// Lock acquisitions can't skip ranks. +/// +/// These two locks *could* be acquired in this order, but only if other locks +/// are acquired in between them. Skipping ranks isn't allowed. +#[test] +#[should_panic(expected = "Locking knight after locking pawn")] +fn forbidden_skip() { + use super::rank; + + let lock1 = Mutex::new(rank::PAWN, ()); + let lock2 = Mutex::new(rank::KNIGHT, ()); + + let _guard1 = lock1.lock(); + let _guard2 = lock2.lock(); +} + +/// Locks can be acquired and released in a stack-like order. +#[test] +fn stack_like() { + use super::rank; + + let lock1 = Mutex::new(rank::PAWN, ()); + let lock2 = Mutex::new(rank::ROOK, ()); + let lock3 = Mutex::new(rank::BISHOP, ()); + + let guard1 = lock1.lock(); + let guard2 = lock2.lock(); + drop(guard2); + + let guard3 = lock3.lock(); + drop(guard3); + drop(guard1); +} + +/// Locks can only be acquired and released in a stack-like order. +#[test] +#[should_panic(expected = "Lock not released in stacking order")] +fn non_stack_like() { + use super::rank; + + let lock1 = Mutex::new(rank::PAWN, ()); + let lock2 = Mutex::new(rank::ROOK, ()); + + let guard1 = lock1.lock(); + let guard2 = lock2.lock(); + + // Avoid a double panic from dropping this while unwinding due to the panic + // we're testing for. + std::mem::forget(guard2); + + drop(guard1); +} diff --git a/third_party/rust/wgpu-core/src/lock/vanilla.rs b/third_party/rust/wgpu-core/src/lock/vanilla.rs new file mode 100644 index 0000000000..9a35b6d9d8 --- /dev/null +++ b/third_party/rust/wgpu-core/src/lock/vanilla.rs @@ -0,0 +1,121 @@ +//! Plain, uninstrumented wrappers around [`parking_lot`] lock types. +//! +//! These definitions are used when no particular lock instrumentation +//! Cargo feature is selected. + +/// A plain wrapper around [`parking_lot::Mutex`]. +/// +/// This is just like [`parking_lot::Mutex`], except that our [`new`] +/// method takes a rank, indicating where the new mutex should sit in +/// `wgpu-core`'s lock ordering. The rank is ignored. +/// +/// See the [`lock`] module documentation for other wrappers. +/// +/// [`new`]: Mutex::new +/// [`lock`]: crate::lock +pub struct Mutex(parking_lot::Mutex); + +/// A guard produced by locking [`Mutex`]. +/// +/// This is just a wrapper around a [`parking_lot::MutexGuard`]. +pub struct MutexGuard<'a, T>(parking_lot::MutexGuard<'a, T>); + +impl Mutex { + pub fn new(_rank: super::rank::LockRank, value: T) -> Mutex { + Mutex(parking_lot::Mutex::new(value)) + } + + pub fn lock(&self) -> MutexGuard { + MutexGuard(self.0.lock()) + } +} + +impl<'a, T> std::ops::Deref for MutexGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} + +impl<'a, T> std::ops::DerefMut for MutexGuard<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.0.deref_mut() + } +} + +impl std::fmt::Debug for Mutex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +/// A plain wrapper around [`parking_lot::RwLock`]. +/// +/// This is just like [`parking_lot::RwLock`], except that our [`new`] +/// method takes a rank, indicating where the new mutex should sit in +/// `wgpu-core`'s lock ordering. The rank is ignored. +/// +/// See the [`lock`] module documentation for other wrappers. +/// +/// [`new`]: RwLock::new +/// [`lock`]: crate::lock +pub struct RwLock(parking_lot::RwLock); + +/// A read guard produced by locking [`RwLock`] as a reader. +/// +/// This is just a wrapper around a [`parking_lot::RwLockReadGuard`]. +pub struct RwLockReadGuard<'a, T>(parking_lot::RwLockReadGuard<'a, T>); + +/// A write guard produced by locking [`RwLock`] as a writer. +/// +/// This is just a wrapper around a [`parking_lot::RwLockWriteGuard`]. +pub struct RwLockWriteGuard<'a, T>(parking_lot::RwLockWriteGuard<'a, T>); + +impl RwLock { + pub fn new(_rank: super::rank::LockRank, value: T) -> RwLock { + RwLock(parking_lot::RwLock::new(value)) + } + + pub fn read(&self) -> RwLockReadGuard { + RwLockReadGuard(self.0.read()) + } + + pub fn write(&self) -> RwLockWriteGuard { + RwLockWriteGuard(self.0.write()) + } +} + +impl<'a, T> RwLockWriteGuard<'a, T> { + pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> { + RwLockReadGuard(parking_lot::RwLockWriteGuard::downgrade(this.0)) + } +} + +impl std::fmt::Debug for RwLock { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl<'a, T> std::ops::Deref for RwLockReadGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} + +impl<'a, T> std::ops::Deref for RwLockWriteGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} + +impl<'a, T> std::ops::DerefMut for RwLockWriteGuard<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.0.deref_mut() + } +} diff --git a/third_party/rust/wgpu-core/src/pipeline.rs b/third_party/rust/wgpu-core/src/pipeline.rs index 4a7651b327..d70b118d7e 100644 --- a/third_party/rust/wgpu-core/src/pipeline.rs +++ b/third_party/rust/wgpu-core/src/pipeline.rs @@ -10,7 +10,8 @@ use crate::{ resource_log, validation, Label, }; use arrayvec::ArrayVec; -use std::{borrow::Cow, error::Error, fmt, marker::PhantomData, num::NonZeroU32, sync::Arc}; +use naga::error::ShaderError; +use std::{borrow::Cow, marker::PhantomData, num::NonZeroU32, sync::Arc}; use thiserror::Error; /// Information about buffer bindings, which @@ -107,79 +108,8 @@ impl ShaderModule { } } -#[derive(Clone, Debug)] -pub struct ShaderError { - pub source: String, - pub label: Option, - pub inner: Box, -} -#[cfg(feature = "wgsl")] -impl fmt::Display for ShaderError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let label = self.label.as_deref().unwrap_or_default(); - let string = self.inner.emit_to_string(&self.source); - write!(f, "\nShader '{label}' parsing {string}") - } -} -#[cfg(feature = "glsl")] -impl fmt::Display for ShaderError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let label = self.label.as_deref().unwrap_or_default(); - let string = self.inner.emit_to_string(&self.source); - write!(f, "\nShader '{label}' parsing {string}") - } -} -#[cfg(feature = "spirv")] -impl fmt::Display for ShaderError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let label = self.label.as_deref().unwrap_or_default(); - let string = self.inner.emit_to_string(&self.source); - write!(f, "\nShader '{label}' parsing {string}") - } -} -impl fmt::Display for ShaderError> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use codespan_reporting::{ - diagnostic::{Diagnostic, Label}, - files::SimpleFile, - term, - }; - - let label = self.label.as_deref().unwrap_or_default(); - let files = SimpleFile::new(label, &self.source); - let config = term::Config::default(); - let mut writer = term::termcolor::NoColor::new(Vec::new()); - - let diagnostic = Diagnostic::error().with_labels( - self.inner - .spans() - .map(|&(span, ref desc)| { - Label::primary((), span.to_range().unwrap()).with_message(desc.to_owned()) - }) - .collect(), - ); - - term::emit(&mut writer, &config, &files, &diagnostic).expect("cannot write error"); - - write!( - f, - "\nShader validation {}", - String::from_utf8_lossy(&writer.into_inner()) - ) - } -} -impl Error for ShaderError -where - ShaderError: fmt::Display, - E: Error + 'static, -{ - fn source(&self) -> Option<&(dyn Error + 'static)> { - Some(&self.inner) - } -} - //Note: `Clone` would require `WithSpan: Clone`. -#[derive(Debug, Error)] +#[derive(Clone, Debug, Error)] #[non_exhaustive] pub enum CreateShaderModuleError { #[cfg(feature = "wgsl")] @@ -187,7 +117,7 @@ pub enum CreateShaderModuleError { Parsing(#[from] ShaderError), #[cfg(feature = "glsl")] #[error(transparent)] - ParsingGlsl(#[from] ShaderError), + ParsingGlsl(#[from] ShaderError), #[cfg(feature = "spirv")] #[error(transparent)] ParsingSpirV(#[from] ShaderError), @@ -209,17 +139,6 @@ pub enum CreateShaderModuleError { }, } -impl CreateShaderModuleError { - pub fn location(&self, source: &str) -> Option { - match *self { - #[cfg(feature = "wgsl")] - CreateShaderModuleError::Parsing(ref err) => err.inner.location(source), - CreateShaderModuleError::Validation(ref err) => err.inner.location(source), - _ => None, - } - } -} - /// Describes a programmable pipeline stage. #[derive(Clone, Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -233,6 +152,19 @@ pub struct ProgrammableStageDescriptor<'a> { /// * If a single entry point associated with this stage must be in the shader, then proceed as /// if `Some(…)` was specified with that entry point's name. pub entry_point: Option>, + /// Specifies the values of pipeline-overridable constants in the shader module. + /// + /// If an `@id` attribute was specified on the declaration, + /// the key must be the pipeline constant ID as a decimal ASCII number; if not, + /// the key must be the constant's identifier name. + /// + /// The value may represent any of WGSL's concrete scalar types. + pub constants: Cow<'a, naga::back::PipelineConstants>, + /// Whether workgroup scoped memory will be initialized with zero values for this stage. + /// + /// This is required by the WebGPU spec, but may have overhead which can be avoided + /// for cross-platform applications + pub zero_initialize_workgroup_memory: bool, } /// Number of implicit bind groups derived at pipeline creation. diff --git a/third_party/rust/wgpu-core/src/pool.rs b/third_party/rust/wgpu-core/src/pool.rs index 47de6d5feb..7d17f3a7a3 100644 --- a/third_party/rust/wgpu-core/src/pool.rs +++ b/third_party/rust/wgpu-core/src/pool.rs @@ -5,8 +5,8 @@ use std::{ }; use once_cell::sync::OnceCell; -use parking_lot::Mutex; +use crate::lock::{rank, Mutex}; use crate::{PreHashedKey, PreHashedMap}; type SlotInner = Weak; @@ -22,13 +22,15 @@ pub struct ResourcePool { impl ResourcePool { pub fn new() -> Self { Self { - inner: Mutex::new(HashMap::default()), + inner: Mutex::new(rank::RESOURCE_POOL_INNER, HashMap::default()), } } - /// Get a resource from the pool with the given entry map, or create a new one if it doesn't exist using the given constructor. + /// Get a resource from the pool with the given entry map, or create a new + /// one if it doesn't exist using the given constructor. /// - /// Behaves such that only one resource will be created for each unique entry map at any one time. + /// Behaves such that only one resource will be created for each unique + /// entry map at any one time. pub fn get_or_init(&self, key: K, constructor: F) -> Result, E> where F: FnOnce(K) -> Result, E>, @@ -96,6 +98,8 @@ impl ResourcePool { /// Remove the given entry map from the pool. /// /// Must *only* be called in the Drop impl of [`BindGroupLayout`]. + /// + /// [`BindGroupLayout`]: crate::binding_model::BindGroupLayout pub fn remove(&self, key: &K) { let hashed_key = PreHashedKey::from_key(key); diff --git a/third_party/rust/wgpu-core/src/present.rs b/third_party/rust/wgpu-core/src/present.rs index cb4e17798f..053f7fdb24 100644 --- a/third_party/rust/wgpu-core/src/present.rs +++ b/third_party/rust/wgpu-core/src/present.rs @@ -9,7 +9,7 @@ When this texture is presented, we remove it from the device tracker as well as extract it from the hub. !*/ -use std::borrow::Borrow; +use std::{borrow::Borrow, sync::Arc}; #[cfg(feature = "trace")] use crate::device::trace::Action; @@ -21,13 +21,13 @@ use crate::{ hal_api::HalApi, hal_label, id, init_tracker::TextureInitTracker, + lock::{rank, Mutex, RwLock}, resource::{self, ResourceInfo}, snatch::Snatchable, track, }; use hal::{Queue as _, Surface as _}; -use parking_lot::{Mutex, RwLock}; use thiserror::Error; use wgt::SurfaceStatus as Status; @@ -157,7 +157,7 @@ impl Global { #[cfg(not(feature = "trace"))] let _ = device; - let suf = A::get_surface(surface.as_ref()); + let suf = A::surface_as_hal(surface.as_ref()); let (texture_id, status) = match unsafe { suf.unwrap() .acquire_texture(Some(std::time::Duration::from_millis( @@ -215,7 +215,10 @@ impl Global { desc: texture_desc, hal_usage, format_features, - initialization_status: RwLock::new(TextureInitTracker::new(1, 1)), + initialization_status: RwLock::new( + rank::TEXTURE_INITIALIZATION_STATUS, + TextureInitTracker::new(1, 1), + ), full_range: track::TextureSelector { layers: 0..1, mips: 0..1, @@ -224,14 +227,17 @@ impl Global { "", Some(device.tracker_indices.textures.clone()), ), - clear_mode: RwLock::new(resource::TextureClearMode::Surface { - clear_view: Some(clear_view), - }), - views: Mutex::new(Vec::new()), - bind_groups: Mutex::new(Vec::new()), + clear_mode: RwLock::new( + rank::TEXTURE_CLEAR_MODE, + resource::TextureClearMode::Surface { + clear_view: Some(clear_view), + }, + ), + views: Mutex::new(rank::TEXTURE_VIEWS, Vec::new()), + bind_groups: Mutex::new(rank::TEXTURE_BIND_GROUPS, Vec::new()), }; - let (id, resource) = fid.assign(texture); + let (id, resource) = fid.assign(Arc::new(texture)); log::debug!("Created CURRENT Surface Texture {:?}", id); { @@ -324,7 +330,7 @@ impl Global { .textures .remove(texture.info.tracker_index()); let mut exclusive_snatch_guard = device.snatchable_lock.write(); - let suf = A::get_surface(&surface); + let suf = A::surface_as_hal(&surface); let mut inner = texture.inner_mut(&mut exclusive_snatch_guard); let inner = inner.as_mut().unwrap(); @@ -418,7 +424,7 @@ impl Global { .lock() .textures .remove(texture.info.tracker_index()); - let suf = A::get_surface(&surface); + let suf = A::surface_as_hal(&surface); let exclusive_snatch_guard = device.snatchable_lock.write(); match texture.inner.snatch(exclusive_snatch_guard).unwrap() { resource::TextureInner::Surface { mut raw, parent_id } => { diff --git a/third_party/rust/wgpu-core/src/registry.rs b/third_party/rust/wgpu-core/src/registry.rs index 80394351af..f0f5674dae 100644 --- a/third_party/rust/wgpu-core/src/registry.rs +++ b/third_party/rust/wgpu-core/src/registry.rs @@ -1,11 +1,11 @@ use std::sync::Arc; -use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use wgt::Backend; use crate::{ id::Id, identity::IdentityManager, + lock::{rank, RwLock, RwLockReadGuard, RwLockWriteGuard}, resource::Resource, storage::{Element, InvalidId, Storage}, }; @@ -37,7 +37,8 @@ impl RegistryReport { /// any other dependent resource /// #[derive(Debug)] -pub struct Registry { +pub(crate) struct Registry { + // Must only contain an id which has either never been used or has been released from `storage` identity: Arc>, storage: RwLock>, backend: Backend, @@ -47,7 +48,7 @@ impl Registry { pub(crate) fn new(backend: Backend) -> Self { Self { identity: Arc::new(IdentityManager::new()), - storage: RwLock::new(Storage::new()), + storage: RwLock::new(rank::REGISTRY_STORAGE, Storage::new()), backend, } } @@ -78,21 +79,26 @@ impl FutureId<'_, T> { Arc::new(value) } + pub fn init_in_place(&self, mut value: Arc) -> Arc { + Arc::get_mut(&mut value) + .unwrap() + .as_info_mut() + .set_id(self.id); + value + } + /// Assign a new resource to this ID. /// /// Registers it with the registry, and fills out the resource info. - pub fn assign(self, value: T) -> (Id, Arc) { + pub fn assign(self, value: Arc) -> (Id, Arc) { let mut data = self.data.write(); - data.insert(self.id, self.init(value)); + data.insert(self.id, self.init_in_place(value)); (self.id, data.get(self.id).unwrap().clone()) } /// Assign an existing resource to a new ID. /// /// Registers it with the registry. - /// - /// This _will_ leak the ID, and it will not be recycled again. - /// See https://github.com/gfx-rs/wgpu/issues/4912. pub fn assign_existing(self, value: &Arc) -> Id { let mut data = self.data.write(); debug_assert!(!data.contains(self.id)); @@ -138,28 +144,35 @@ impl Registry { pub(crate) fn write<'a>(&'a self) -> RwLockWriteGuard<'a, Storage> { self.storage.write() } - pub fn unregister_locked(&self, id: Id, storage: &mut Storage) -> Option> { + pub(crate) fn unregister_locked( + &self, + id: Id, + storage: &mut Storage, + ) -> Option> { self.identity.free(id); storage.remove(id) } - pub fn force_replace(&self, id: Id, mut value: T) { + pub(crate) fn force_replace(&self, id: Id, mut value: T) { let mut storage = self.storage.write(); value.as_info_mut().set_id(id); storage.force_replace(id, value) } - pub fn force_replace_with_error(&self, id: Id, label: &str) { + pub(crate) fn force_replace_with_error(&self, id: Id, label: &str) { let mut storage = self.storage.write(); storage.remove(id); storage.insert_error(id, label); } pub(crate) fn unregister(&self, id: Id) -> Option> { - self.identity.free(id); let value = self.storage.write().remove(id); + // This needs to happen *after* removing it from the storage, to maintain the + // invariant that `self.identity` only contains ids which are actually available + // See https://github.com/gfx-rs/wgpu/issues/5372 + self.identity.free(id); //Returning None is legal if it's an error ID value } - pub fn label_for_resource(&self, id: Id) -> String { + pub(crate) fn label_for_resource(&self, id: Id) -> String { let guard = self.storage.read(); let type_name = guard.kind(); @@ -197,3 +210,53 @@ impl Registry { report } } + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use crate::{ + id::Marker, + resource::{Resource, ResourceInfo, ResourceType}, + }; + + use super::Registry; + struct TestData { + info: ResourceInfo, + } + struct TestDataId; + impl Marker for TestDataId {} + + impl Resource for TestData { + type Marker = TestDataId; + + const TYPE: ResourceType = "Test data"; + + fn as_info(&self) -> &ResourceInfo { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo { + &mut self.info + } + } + + #[test] + fn simultaneous_registration() { + let registry = Registry::without_backend(); + std::thread::scope(|s| { + for _ in 0..5 { + s.spawn(|| { + for _ in 0..1000 { + let value = Arc::new(TestData { + info: ResourceInfo::new("Test data", None), + }); + let new_id = registry.prepare(None); + let (id, _) = new_id.assign(value); + registry.unregister(id); + } + }); + } + }) + } +} diff --git a/third_party/rust/wgpu-core/src/resource.rs b/third_party/rust/wgpu-core/src/resource.rs index aca077caab..d3cd2968bf 100644 --- a/third_party/rust/wgpu-core/src/resource.rs +++ b/third_party/rust/wgpu-core/src/resource.rs @@ -8,8 +8,12 @@ use crate::{ }, global::Global, hal_api::HalApi, - id::{AdapterId, BufferId, DeviceId, Id, Marker, SurfaceId, TextureId}, + id::{ + AdapterId, BufferId, CommandEncoderId, DeviceId, Id, Marker, SurfaceId, TextureId, + TextureViewId, + }, init_tracker::{BufferInitTracker, TextureInitTracker}, + lock::{Mutex, RwLock}, resource, resource_log, snatch::{ExclusiveSnatchGuard, SnatchGuard, Snatchable}, track::{SharedTrackerIndexAllocator, TextureSelector, TrackerIndex}, @@ -18,7 +22,6 @@ use crate::{ }; use hal::CommandEncoder; -use parking_lot::{Mutex, RwLock}; use smallvec::SmallVec; use thiserror::Error; use wgt::WasmNotSendSync; @@ -55,7 +58,7 @@ use std::{ /// [`Device`]: crate::device::resource::Device /// [`Buffer`]: crate::resource::Buffer #[derive(Debug)] -pub struct ResourceInfo { +pub(crate) struct ResourceInfo { id: Option>, tracker_index: TrackerIndex, tracker_indices: Option>, @@ -141,7 +144,7 @@ impl ResourceInfo { pub(crate) type ResourceType = &'static str; -pub trait Resource: 'static + Sized + WasmNotSendSync { +pub(crate) trait Resource: 'static + Sized + WasmNotSendSync { type Marker: Marker; const TYPE: ResourceType; fn as_info(&self) -> &ResourceInfo; @@ -370,10 +373,10 @@ pub type BufferAccessResult = Result<(), BufferAccessError>; #[derive(Debug)] pub(crate) struct BufferPendingMapping { - pub range: Range, - pub op: BufferMapOperation, + pub(crate) range: Range, + pub(crate) op: BufferMapOperation, // hold the parent alive while the mapping is active - pub _parent_buffer: Arc>, + pub(crate) _parent_buffer: Arc>, } pub type BufferDescriptor<'a> = wgt::BufferDescriptor>; @@ -734,7 +737,7 @@ pub(crate) enum TextureInner { } impl TextureInner { - pub fn raw(&self) -> Option<&A::Texture> { + pub(crate) fn raw(&self) -> Option<&A::Texture> { match self { Self::Native { raw } => Some(raw), Self::Surface { raw: Some(tex), .. } => Some(tex.borrow()), @@ -924,11 +927,11 @@ impl Global { /// # Safety /// /// - The raw texture handle must not be manually destroyed - pub unsafe fn texture_as_hal)>( + pub unsafe fn texture_as_hal) -> R, R>( &self, id: TextureId, hal_texture_callback: F, - ) { + ) -> R { profiling::scope!("Texture::as_hal"); let hub = A::hub(self); @@ -937,7 +940,26 @@ impl Global { let snatch_guard = texture.device.snatchable_lock.read(); let hal_texture = texture.raw(&snatch_guard); - hal_texture_callback(hal_texture); + hal_texture_callback(hal_texture) + } + + /// # Safety + /// + /// - The raw texture view handle must not be manually destroyed + pub unsafe fn texture_view_as_hal) -> R, R>( + &self, + id: TextureViewId, + hal_texture_view_callback: F, + ) -> R { + profiling::scope!("TextureView::as_hal"); + + let hub = A::hub(self); + let texture_view_opt = { hub.texture_views.try_get(id).ok().flatten() }; + let texture_view = texture_view_opt.as_ref().unwrap(); + let snatch_guard = texture_view.device.snatchable_lock.read(); + let hal_texture_view = texture_view.raw(&snatch_guard); + + hal_texture_view_callback(hal_texture_view) } /// # Safety @@ -1001,10 +1023,38 @@ impl Global { profiling::scope!("Surface::as_hal"); let surface = self.surfaces.get(id).ok(); - let hal_surface = surface.as_ref().and_then(|surface| A::get_surface(surface)); + let hal_surface = surface + .as_ref() + .and_then(|surface| A::surface_as_hal(surface)); hal_surface_callback(hal_surface) } + + /// # Safety + /// + /// - The raw command encoder handle must not be manually destroyed + pub unsafe fn command_encoder_as_hal_mut< + A: HalApi, + F: FnOnce(Option<&mut A::CommandEncoder>) -> R, + R, + >( + &self, + id: CommandEncoderId, + hal_command_encoder_callback: F, + ) -> R { + profiling::scope!("CommandEncoder::as_hal"); + + let hub = A::hub(self); + let cmd_buf = hub + .command_buffers + .get(id.into_command_buffer_id()) + .unwrap(); + let mut cmd_buf_data = cmd_buf.data.lock(); + let cmd_buf_data = cmd_buf_data.as_mut().unwrap(); + let cmd_buf_raw = cmd_buf_data.encoder.open().ok(); + + hal_command_encoder_callback(cmd_buf_raw) + } } /// A texture that has been marked as destroyed and is staged for actual deletion soon. diff --git a/third_party/rust/wgpu-core/src/snatch.rs b/third_party/rust/wgpu-core/src/snatch.rs index d5cd1a3d37..08a1eba11d 100644 --- a/third_party/rust/wgpu-core/src/snatch.rs +++ b/third_party/rust/wgpu-core/src/snatch.rs @@ -1,6 +1,6 @@ #![allow(unused)] -use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; +use crate::lock::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use std::{ backtrace::Backtrace, cell::{Cell, RefCell, UnsafeCell}, @@ -8,6 +8,8 @@ use std::{ thread, }; +use crate::lock::rank; + /// A guard that provides read access to snatchable data. pub struct SnatchGuard<'a>(RwLockReadGuard<'a, ()>); /// A guard that allows snatching the snatchable data. @@ -64,8 +66,58 @@ impl std::fmt::Debug for Snatchable { unsafe impl Sync for Snatchable {} +struct LockTrace { + purpose: &'static str, + caller: &'static Location<'static>, + backtrace: Backtrace, +} + +impl std::fmt::Display for LockTrace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "a {} lock at {}\n{}", + self.purpose, self.caller, self.backtrace + ) + } +} + +#[cfg(debug_assertions)] +impl LockTrace { + #[track_caller] + fn enter(purpose: &'static str) { + let new = LockTrace { + purpose, + caller: Location::caller(), + backtrace: Backtrace::capture(), + }; + + if let Some(prev) = SNATCH_LOCK_TRACE.take() { + let current = thread::current(); + let name = current.name().unwrap_or(""); + panic!( + "thread '{name}' attempted to acquire a snatch lock recursively.\n\ + - Currently trying to acquire {new}\n\ + - Previously acquired {prev}", + ); + } else { + SNATCH_LOCK_TRACE.set(Some(new)); + } + } + + fn exit() { + SNATCH_LOCK_TRACE.take(); + } +} + +#[cfg(not(debug_assertions))] +impl LockTrace { + fn enter(purpose: &'static str) {} + fn exit() {} +} + thread_local! { - static READ_LOCK_LOCATION: Cell, Backtrace)>> = const { Cell::new(None) }; + static SNATCH_LOCK_TRACE: Cell> = const { Cell::new(None) }; } /// A Device-global lock for all snatchable data. @@ -78,31 +130,16 @@ impl SnatchLock { /// right SnatchLock (the one associated to the same device). This method is unsafe /// to force force sers to think twice about creating a SnatchLock. The only place this /// method should be called is when creating the device. - pub unsafe fn new() -> Self { + pub unsafe fn new(rank: rank::LockRank) -> Self { SnatchLock { - lock: RwLock::new(()), + lock: RwLock::new(rank, ()), } } /// Request read access to snatchable resources. #[track_caller] pub fn read(&self) -> SnatchGuard { - if cfg!(debug_assertions) { - let caller = Location::caller(); - let backtrace = Backtrace::capture(); - if let Some((prev, bt)) = READ_LOCK_LOCATION.take() { - let current = thread::current(); - let name = current.name().unwrap_or(""); - panic!( - "thread '{name}' attempted to acquire a snatch read lock recursively.\n - - {prev}\n{bt}\n - - {caller}\n{backtrace}" - ); - } else { - READ_LOCK_LOCATION.set(Some((caller, backtrace))); - } - } - + LockTrace::enter("read"); SnatchGuard(self.lock.read()) } @@ -111,14 +148,21 @@ impl SnatchLock { /// This should only be called when a resource needs to be snatched. This has /// a high risk of causing lock contention if called concurrently with other /// wgpu work. + #[track_caller] pub fn write(&self) -> ExclusiveSnatchGuard { + LockTrace::enter("write"); ExclusiveSnatchGuard(self.lock.write()) } } impl Drop for SnatchGuard<'_> { fn drop(&mut self) { - #[cfg(debug_assertions)] - READ_LOCK_LOCATION.take(); + LockTrace::exit(); + } +} + +impl Drop for ExclusiveSnatchGuard<'_> { + fn drop(&mut self) { + LockTrace::exit(); } } diff --git a/third_party/rust/wgpu-core/src/storage.rs b/third_party/rust/wgpu-core/src/storage.rs index 554ced8b7d..03874b8104 100644 --- a/third_party/rust/wgpu-core/src/storage.rs +++ b/third_party/rust/wgpu-core/src/storage.rs @@ -29,11 +29,14 @@ pub(crate) struct InvalidId; /// A table of `T` values indexed by the id type `I`. /// +/// `Storage` implements [`std::ops::Index`], accepting `Id` values as +/// indices. +/// /// The table is represented as a vector indexed by the ids' index /// values, so you should use an id allocator like `IdentityManager` /// that keeps the index values dense and close to zero. #[derive(Debug)] -pub struct Storage +pub(crate) struct Storage where T: Resource, { diff --git a/third_party/rust/wgpu-core/src/track/buffer.rs b/third_party/rust/wgpu-core/src/track/buffer.rs index 6cf1fdda6f..9a52a53253 100644 --- a/third_party/rust/wgpu-core/src/track/buffer.rs +++ b/third_party/rust/wgpu-core/src/track/buffer.rs @@ -11,6 +11,7 @@ use super::{PendingTransition, ResourceTracker, TrackerIndex}; use crate::{ hal_api::HalApi, id::BufferId, + lock::{rank, Mutex}, resource::{Buffer, Resource}, snatch::SnatchGuard, storage::Storage, @@ -20,7 +21,6 @@ use crate::{ }, }; use hal::{BufferBarrier, BufferUses}; -use parking_lot::Mutex; use wgt::{strict_assert, strict_assert_eq}; impl ResourceUses for BufferUses { @@ -51,7 +51,7 @@ pub(crate) struct BufferBindGroupState { impl BufferBindGroupState { pub fn new() -> Self { Self { - buffers: Mutex::new(Vec::new()), + buffers: Mutex::new(rank::BUFFER_BIND_GROUP_STATE_BUFFERS, Vec::new()), _phantom: PhantomData, } @@ -245,6 +245,22 @@ impl BufferUsageScope { .get(id) .map_err(|_| UsageConflict::BufferInvalid { id })?; + self.insert_merge_single(buffer.clone(), new_state) + .map(|_| buffer) + } + + /// Merge a single state into the UsageScope, using an already resolved buffer. + /// + /// If the resulting state is invalid, returns a usage + /// conflict with the details of the invalid state. + /// + /// If the ID is higher than the length of internal vectors, + /// the vectors will be extended. A call to set_size is not needed. + pub fn insert_merge_single( + &mut self, + buffer: Arc>, + new_state: BufferUses, + ) -> Result<(), UsageConflict> { let index = buffer.info.tracker_index().as_usize(); self.allow_index(index); @@ -260,12 +276,12 @@ impl BufferUsageScope { index, BufferStateProvider::Direct { state: new_state }, ResourceMetadataProvider::Direct { - resource: Cow::Owned(buffer.clone()), + resource: Cow::Owned(buffer), }, )?; } - Ok(buffer) + Ok(()) } } diff --git a/third_party/rust/wgpu-core/src/track/metadata.rs b/third_party/rust/wgpu-core/src/track/metadata.rs index 3e71e0e084..d6e8d6f906 100644 --- a/third_party/rust/wgpu-core/src/track/metadata.rs +++ b/third_party/rust/wgpu-core/src/track/metadata.rs @@ -87,16 +87,18 @@ impl ResourceMetadata { /// Add the resource with the given index, epoch, and reference count to the /// set. /// + /// Returns a reference to the newly inserted resource. + /// (This allows avoiding a clone/reference count increase in many cases.) + /// /// # Safety /// /// The given `index` must be in bounds for this `ResourceMetadata`'s /// existing tables. See `tracker_assert_in_bounds`. #[inline(always)] - pub(super) unsafe fn insert(&mut self, index: usize, resource: Arc) { + pub(super) unsafe fn insert(&mut self, index: usize, resource: Arc) -> &Arc { self.owned.set(index, true); - unsafe { - *self.resources.get_unchecked_mut(index) = Some(resource); - } + let resource_dst = unsafe { self.resources.get_unchecked_mut(index) }; + resource_dst.insert(resource) } /// Get the resource with the given index. diff --git a/third_party/rust/wgpu-core/src/track/mod.rs b/third_party/rust/wgpu-core/src/track/mod.rs index 374dfe7493..cc20b2a01c 100644 --- a/third_party/rust/wgpu-core/src/track/mod.rs +++ b/third_party/rust/wgpu-core/src/track/mod.rs @@ -102,10 +102,14 @@ mod stateless; mod texture; use crate::{ - binding_model, command, conv, hal_api::HalApi, id, pipeline, resource, snatch::SnatchGuard, + binding_model, command, conv, + hal_api::HalApi, + id, + lock::{rank, Mutex, RwLock}, + pipeline, resource, + snatch::SnatchGuard, }; -use parking_lot::{Mutex, RwLock}; use std::{fmt, ops, sync::Arc}; use thiserror::Error; @@ -136,7 +140,8 @@ impl TrackerIndex { /// of a certain type. This index is separate from the resource ID for various reasons: /// - There can be multiple resource IDs pointing the the same resource. /// - IDs of dead handles can be recycled while resources are internally held alive (and tracked). -/// - The plan is to remove IDs in the long run (https://github.com/gfx-rs/wgpu/issues/5121). +/// - The plan is to remove IDs in the long run +/// ([#5121](https://github.com/gfx-rs/wgpu/issues/5121)). /// In order to produce these tracker indices, there is a shared TrackerIndexAllocator /// per resource type. Indices have the same lifetime as the internal resource they /// are associated to (alloc happens when creating the resource and free is called when @@ -190,7 +195,10 @@ pub(crate) struct SharedTrackerIndexAllocator { impl SharedTrackerIndexAllocator { pub fn new() -> Self { SharedTrackerIndexAllocator { - inner: Mutex::new(TrackerIndexAllocator::new()), + inner: Mutex::new( + rank::SHARED_TRACKER_INDEX_ALLOCATOR_INNER, + TrackerIndexAllocator::new(), + ), } } @@ -480,11 +488,26 @@ impl RenderBundleScope { /// Create the render bundle scope and pull the maximum IDs from the hubs. pub fn new() -> Self { Self { - buffers: RwLock::new(BufferUsageScope::default()), - textures: RwLock::new(TextureUsageScope::default()), - bind_groups: RwLock::new(StatelessTracker::new()), - render_pipelines: RwLock::new(StatelessTracker::new()), - query_sets: RwLock::new(StatelessTracker::new()), + buffers: RwLock::new( + rank::RENDER_BUNDLE_SCOPE_BUFFERS, + BufferUsageScope::default(), + ), + textures: RwLock::new( + rank::RENDER_BUNDLE_SCOPE_TEXTURES, + TextureUsageScope::default(), + ), + bind_groups: RwLock::new( + rank::RENDER_BUNDLE_SCOPE_BIND_GROUPS, + StatelessTracker::new(), + ), + render_pipelines: RwLock::new( + rank::RENDER_BUNDLE_SCOPE_RENDER_PIPELINES, + StatelessTracker::new(), + ), + query_sets: RwLock::new( + rank::RENDER_BUNDLE_SCOPE_QUERY_SETS, + StatelessTracker::new(), + ), } } @@ -639,8 +662,8 @@ impl Tracker { /// /// If a transition is needed to get the resources into the needed /// state, those transitions are stored within the tracker. A - /// subsequent call to [`BufferTracker::drain`] or - /// [`TextureTracker::drain`] is needed to get those transitions. + /// subsequent call to [`BufferTracker::drain_transitions`] or + /// [`TextureTracker::drain_transitions`] is needed to get those transitions. /// /// This is a really funky method used by Compute Passes to generate /// barriers after a call to dispatch without needing to iterate diff --git a/third_party/rust/wgpu-core/src/track/stateless.rs b/third_party/rust/wgpu-core/src/track/stateless.rs index 00225f2305..25ffc027ee 100644 --- a/third_party/rust/wgpu-core/src/track/stateless.rs +++ b/third_party/rust/wgpu-core/src/track/stateless.rs @@ -6,9 +6,14 @@ use std::sync::Arc; -use parking_lot::Mutex; - -use crate::{id::Id, resource::Resource, resource_log, storage::Storage, track::ResourceMetadata}; +use crate::{ + id::Id, + lock::{rank, Mutex}, + resource::Resource, + resource_log, + storage::Storage, + track::ResourceMetadata, +}; use super::{ResourceTracker, TrackerIndex}; @@ -24,7 +29,7 @@ pub(crate) struct StatelessBindGroupSate { impl StatelessBindGroupSate { pub fn new() -> Self { Self { - resources: Mutex::new(Vec::new()), + resources: Mutex::new(rank::STATELESS_BIND_GROUP_STATE_RESOURCES, Vec::new()), } } @@ -153,16 +158,17 @@ impl StatelessTracker { /// /// If the ID is higher than the length of internal vectors, /// the vectors will be extended. A call to set_size is not needed. - pub fn insert_single(&mut self, resource: Arc) { + /// + /// Returns a reference to the newly inserted resource. + /// (This allows avoiding a clone/reference count increase in many cases.) + pub fn insert_single(&mut self, resource: Arc) -> &Arc { let index = resource.as_info().tracker_index().as_usize(); self.allow_index(index); self.tracker_assert_in_bounds(index); - unsafe { - self.metadata.insert(index, resource); - } + unsafe { self.metadata.insert(index, resource) } } /// Adds the given resource to the tracker. diff --git a/third_party/rust/wgpu-core/src/track/texture.rs b/third_party/rust/wgpu-core/src/track/texture.rs index 3cf95ff38a..51ed72a18d 100644 --- a/third_party/rust/wgpu-core/src/track/texture.rs +++ b/third_party/rust/wgpu-core/src/track/texture.rs @@ -24,6 +24,7 @@ use super::{ }; use crate::{ hal_api::HalApi, + lock::{rank, Mutex}, resource::{Resource, Texture, TextureInner}, snatch::SnatchGuard, track::{ @@ -36,7 +37,6 @@ use hal::TextureUses; use arrayvec::ArrayVec; use naga::FastHashMap; -use parking_lot::Mutex; use wgt::{strict_assert, strict_assert_eq}; use std::{borrow::Cow, iter, marker::PhantomData, ops::Range, sync::Arc, vec::Drain}; @@ -164,7 +164,7 @@ pub(crate) struct TextureBindGroupState { impl TextureBindGroupState { pub fn new() -> Self { Self { - textures: Mutex::new(Vec::new()), + textures: Mutex::new(rank::TEXTURE_BIND_GROUP_STATE_TEXTURES, Vec::new()), } } diff --git a/third_party/rust/wgpu-core/src/validation.rs b/third_party/rust/wgpu-core/src/validation.rs index e4846c4000..d360ee9621 100644 --- a/third_party/rust/wgpu-core/src/validation.rs +++ b/third_party/rust/wgpu-core/src/validation.rs @@ -655,7 +655,8 @@ impl NumericType { | Vf::Unorm16x4 | Vf::Snorm16x4 | Vf::Float16x4 - | Vf::Float32x4 => (NumericDimension::Vector(Vs::Quad), Scalar::F32), + | Vf::Float32x4 + | Vf::Unorm10_10_10_2 => (NumericDimension::Vector(Vs::Quad), Scalar::F32), Vf::Float64 => (NumericDimension::Scalar, Scalar::F64), Vf::Float64x2 => (NumericDimension::Vector(Vs::Bi), Scalar::F64), Vf::Float64x3 => (NumericDimension::Vector(Vs::Tri), Scalar::F64), -- cgit v1.2.3