diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/wgpu-core/src/device/queue.rs | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/wgpu-core/src/device/queue.rs')
-rw-r--r-- | third_party/rust/wgpu-core/src/device/queue.rs | 1569 |
1 files changed, 1569 insertions, 0 deletions
diff --git a/third_party/rust/wgpu-core/src/device/queue.rs b/third_party/rust/wgpu-core/src/device/queue.rs new file mode 100644 index 0000000000..08c5b767b6 --- /dev/null +++ b/third_party/rust/wgpu-core/src/device/queue.rs @@ -0,0 +1,1569 @@ +#[cfg(feature = "trace")] +use crate::device::trace::Action; +use crate::{ + api_log, + command::{ + extract_texture_selector, validate_linear_texture_data, validate_texture_copy_range, + ClearError, CommandBuffer, CopySide, ImageCopyTexture, TransferError, + }, + conv, + device::{life::ResourceMaps, DeviceError, WaitIdleError}, + get_lowest_common_denom, + global::Global, + hal_api::HalApi, + hal_label, + id::{self, QueueId}, + init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange}, + resource::{ + Buffer, BufferAccessError, BufferMapState, DestroyedBuffer, DestroyedTexture, Resource, + ResourceInfo, ResourceType, StagingBuffer, Texture, TextureInner, + }, + resource_log, track, FastHashMap, SubmissionIndex, +}; + +use hal::{CommandEncoder as _, Device as _, Queue as _}; +use parking_lot::Mutex; +use smallvec::SmallVec; + +use std::{ + iter, mem, ptr, + sync::{atomic::Ordering, Arc}, +}; +use thiserror::Error; + +use super::Device; + +pub struct Queue<A: HalApi> { + pub device: Option<Arc<Device<A>>>, + pub raw: Option<A::Queue>, + pub info: ResourceInfo<Queue<A>>, +} + +impl<A: HalApi> Resource for Queue<A> { + const TYPE: ResourceType = "Queue"; + + type Marker = crate::id::markers::Queue; + + fn as_info(&self) -> &ResourceInfo<Self> { + &self.info + } + + fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> { + &mut self.info + } +} + +impl<A: HalApi> Drop for Queue<A> { + fn drop(&mut self) { + let queue = self.raw.take().unwrap(); + self.device.as_ref().unwrap().release_queue(queue); + } +} + +/// Number of command buffers that we generate from the same pool +/// for the write_xxx commands, before the pool is recycled. +/// +/// If we don't stop at some point, the pool will grow forever, +/// without a concrete moment of when it can be cleared. +const WRITE_COMMAND_BUFFERS_PER_POOL: usize = 64; + +#[repr(C)] +pub struct SubmittedWorkDoneClosureC { + pub callback: unsafe extern "C" fn(user_data: *mut u8), + pub user_data: *mut u8, +} + +#[cfg(send_sync)] +unsafe impl Send for SubmittedWorkDoneClosureC {} + +pub struct SubmittedWorkDoneClosure { + // We wrap this so creating the enum in the C variant can be unsafe, + // allowing our call function to be safe. + inner: SubmittedWorkDoneClosureInner, +} + +#[cfg(send_sync)] +type SubmittedWorkDoneCallback = Box<dyn FnOnce() + Send + 'static>; +#[cfg(not(send_sync))] +type SubmittedWorkDoneCallback = Box<dyn FnOnce() + 'static>; + +enum SubmittedWorkDoneClosureInner { + Rust { callback: SubmittedWorkDoneCallback }, + C { inner: SubmittedWorkDoneClosureC }, +} + +impl SubmittedWorkDoneClosure { + pub fn from_rust(callback: SubmittedWorkDoneCallback) -> Self { + Self { + inner: SubmittedWorkDoneClosureInner::Rust { callback }, + } + } + + /// # Safety + /// + /// - The callback pointer must be valid to call with the provided `user_data` + /// pointer. + /// + /// - Both pointers must point to `'static` data, as the callback may happen at + /// an unspecified time. + pub unsafe fn from_c(inner: SubmittedWorkDoneClosureC) -> Self { + Self { + inner: SubmittedWorkDoneClosureInner::C { inner }, + } + } + + pub(crate) fn call(self) { + match self.inner { + SubmittedWorkDoneClosureInner::Rust { callback } => callback(), + // SAFETY: the contract of the call to from_c says that this unsafe is sound. + SubmittedWorkDoneClosureInner::C { inner } => unsafe { + (inner.callback)(inner.user_data) + }, + } + } +} + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct WrappedSubmissionIndex { + pub queue_id: QueueId, + pub index: SubmissionIndex, +} + +/// A texture or buffer to be freed soon. +/// +/// This is just a tagged raw texture or buffer, generally about to be added to +/// some other more specific container like: +/// +/// - `PendingWrites::temp_resources`: resources used by queue writes and +/// unmaps, waiting to be folded in with the next queue submission +/// +/// - `ActiveSubmission::last_resources`: temporary resources used by a queue +/// submission, to be freed when it completes +/// +/// - `LifetimeTracker::free_resources`: resources to be freed in the next +/// `maintain` call, no longer used anywhere +#[derive(Debug)] +pub enum TempResource<A: HalApi> { + Buffer(Arc<Buffer<A>>), + StagingBuffer(Arc<StagingBuffer<A>>), + DestroyedBuffer(Arc<DestroyedBuffer<A>>), + DestroyedTexture(Arc<DestroyedTexture<A>>), + Texture(Arc<Texture<A>>), +} + +/// A queue execution for a particular command encoder. +pub(crate) struct EncoderInFlight<A: HalApi> { + raw: A::CommandEncoder, + cmd_buffers: Vec<A::CommandBuffer>, +} + +impl<A: HalApi> EncoderInFlight<A> { + pub(crate) unsafe fn land(mut self) -> A::CommandEncoder { + unsafe { self.raw.reset_all(self.cmd_buffers.into_iter()) }; + self.raw + } +} + +/// A private command encoder for writes made directly on the device +/// or queue. +/// +/// Operations like `buffer_unmap`, `queue_write_buffer`, and +/// `queue_write_texture` need to copy data to the GPU. At the hal +/// level, this must be done by encoding and submitting commands, but +/// these operations are not associated with any specific wgpu command +/// buffer. +/// +/// Instead, `Device::pending_writes` owns one of these values, which +/// has its own hal command encoder and resource lists. The commands +/// accumulated here are automatically submitted to the queue the next +/// time the user submits a wgpu command buffer, ahead of the user's +/// commands. +/// +/// Important: +/// When locking pending_writes be sure that tracker is not locked +/// and try to lock trackers for the minimum timespan possible +/// +/// All uses of [`StagingBuffer`]s end up here. +#[derive(Debug)] +pub(crate) struct PendingWrites<A: HalApi> { + pub command_encoder: A::CommandEncoder, + pub is_active: bool, + pub temp_resources: Vec<TempResource<A>>, + pub dst_buffers: FastHashMap<id::BufferId, Arc<Buffer<A>>>, + pub dst_textures: FastHashMap<id::TextureId, Arc<Texture<A>>>, + pub executing_command_buffers: Vec<A::CommandBuffer>, +} + +impl<A: HalApi> PendingWrites<A> { + pub fn new(command_encoder: A::CommandEncoder) -> Self { + Self { + command_encoder, + is_active: false, + temp_resources: Vec::new(), + dst_buffers: FastHashMap::default(), + dst_textures: FastHashMap::default(), + executing_command_buffers: Vec::new(), + } + } + + pub fn dispose(mut self, device: &A::Device) { + unsafe { + if self.is_active { + self.command_encoder.discard_encoding(); + } + self.command_encoder + .reset_all(self.executing_command_buffers.into_iter()); + device.destroy_command_encoder(self.command_encoder); + } + + self.temp_resources.clear(); + } + + pub fn consume_temp(&mut self, resource: TempResource<A>) { + self.temp_resources.push(resource); + } + + fn consume(&mut self, buffer: Arc<StagingBuffer<A>>) { + self.temp_resources + .push(TempResource::StagingBuffer(buffer)); + } + + fn pre_submit(&mut self) -> Result<Option<&A::CommandBuffer>, DeviceError> { + self.dst_buffers.clear(); + self.dst_textures.clear(); + if self.is_active { + let cmd_buf = unsafe { self.command_encoder.end_encoding()? }; + self.is_active = false; + self.executing_command_buffers.push(cmd_buf); + + return Ok(self.executing_command_buffers.last()); + } + + Ok(None) + } + + #[must_use] + fn post_submit( + &mut self, + command_allocator: &mut super::CommandAllocator<A>, + device: &A::Device, + queue: &A::Queue, + ) -> Option<EncoderInFlight<A>> { + if self.executing_command_buffers.len() >= WRITE_COMMAND_BUFFERS_PER_POOL { + let new_encoder = command_allocator.acquire_encoder(device, queue).unwrap(); + Some(EncoderInFlight { + raw: mem::replace(&mut self.command_encoder, new_encoder), + cmd_buffers: mem::take(&mut self.executing_command_buffers), + }) + } else { + None + } + } + + pub fn activate(&mut self) -> &mut A::CommandEncoder { + if !self.is_active { + unsafe { + self.command_encoder + .begin_encoding(Some("(wgpu internal) PendingWrites")) + .unwrap(); + } + self.is_active = true; + } + &mut self.command_encoder + } + + pub fn deactivate(&mut self) { + if self.is_active { + unsafe { + self.command_encoder.discard_encoding(); + } + self.is_active = false; + } + } +} + +fn prepare_staging_buffer<A: HalApi>( + device: &Arc<Device<A>>, + size: wgt::BufferAddress, + instance_flags: wgt::InstanceFlags, +) -> Result<(StagingBuffer<A>, *mut u8), DeviceError> { + profiling::scope!("prepare_staging_buffer"); + let stage_desc = hal::BufferDescriptor { + label: hal_label(Some("(wgpu internal) Staging"), instance_flags), + size, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC, + memory_flags: hal::MemoryFlags::TRANSIENT, + }; + + let buffer = unsafe { device.raw().create_buffer(&stage_desc)? }; + let mapping = unsafe { device.raw().map_buffer(&buffer, 0..size) }?; + + let staging_buffer = StagingBuffer { + raw: Mutex::new(Some(buffer)), + device: device.clone(), + size, + info: ResourceInfo::new("<StagingBuffer>"), + is_coherent: mapping.is_coherent, + }; + + Ok((staging_buffer, mapping.ptr.as_ptr())) +} + +impl<A: HalApi> StagingBuffer<A> { + unsafe fn flush(&self, device: &A::Device) -> Result<(), DeviceError> { + if !self.is_coherent { + unsafe { + device.flush_mapped_ranges( + self.raw.lock().as_ref().unwrap(), + iter::once(0..self.size), + ) + }; + } + unsafe { device.unmap_buffer(self.raw.lock().as_ref().unwrap())? }; + Ok(()) + } +} + +#[derive(Clone, Debug, Error)] +#[error("Queue is invalid")] +pub struct InvalidQueue; + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum QueueWriteError { + #[error(transparent)] + Queue(#[from] DeviceError), + #[error(transparent)] + Transfer(#[from] TransferError), + #[error(transparent)] + MemoryInitFailure(#[from] ClearError), +} + +#[derive(Clone, Debug, Error)] +#[non_exhaustive] +pub enum QueueSubmitError { + #[error(transparent)] + Queue(#[from] DeviceError), + #[error("Buffer {0:?} is destroyed")] + DestroyedBuffer(id::BufferId), + #[error("Texture {0:?} is destroyed")] + DestroyedTexture(id::TextureId), + #[error(transparent)] + Unmap(#[from] BufferAccessError), + #[error("Buffer {0:?} is still mapped")] + BufferStillMapped(id::BufferId), + #[error("Surface output was dropped before the command buffer got submitted")] + SurfaceOutputDropped, + #[error("Surface was unconfigured before the command buffer got submitted")] + SurfaceUnconfigured, + #[error("GPU got stuck :(")] + StuckGpu, +} + +//TODO: move out common parts of write_xxx. + +impl Global { + pub fn queue_write_buffer<A: HalApi>( + &self, + queue_id: QueueId, + buffer_id: id::BufferId, + buffer_offset: wgt::BufferAddress, + data: &[u8], + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::write_buffer"); + api_log!("Queue::write_buffer {buffer_id:?} {}bytes", data.len()); + + let hub = A::hub(self); + + let queue = hub + .queues + .get(queue_id) + .map_err(|_| DeviceError::InvalidQueueId)?; + + let device = queue.device.as_ref().unwrap(); + + let data_size = data.len() as wgt::BufferAddress; + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + let data_path = trace.make_binary("bin", data); + trace.add(Action::WriteBuffer { + id: buffer_id, + data: data_path, + range: buffer_offset..buffer_offset + data_size, + queued: true, + }); + } + + if data_size == 0 { + log::trace!("Ignoring write_buffer of size 0"); + return Ok(()); + } + + // Platform validation requires that the staging buffer always be + // freed, even if an error occurs. All paths from here must call + // `device.pending_writes.consume`. + let (staging_buffer, staging_buffer_ptr) = + prepare_staging_buffer(device, data_size, device.instance_flags)?; + let mut pending_writes = device.pending_writes.lock(); + let pending_writes = pending_writes.as_mut().unwrap(); + + let stage_fid = hub.staging_buffers.request(); + let staging_buffer = stage_fid.init(staging_buffer); + + if let Err(flush_error) = unsafe { + profiling::scope!("copy"); + ptr::copy_nonoverlapping(data.as_ptr(), staging_buffer_ptr, data.len()); + staging_buffer.flush(device.raw()) + } { + pending_writes.consume(staging_buffer); + return Err(flush_error.into()); + } + + let result = self.queue_write_staging_buffer_impl( + device, + pending_writes, + &staging_buffer, + buffer_id, + buffer_offset, + ); + + pending_writes.consume(staging_buffer); + result + } + + pub fn queue_create_staging_buffer<A: HalApi>( + &self, + queue_id: QueueId, + buffer_size: wgt::BufferSize, + id_in: Option<id::StagingBufferId>, + ) -> Result<(id::StagingBufferId, *mut u8), QueueWriteError> { + profiling::scope!("Queue::create_staging_buffer"); + let hub = A::hub(self); + + let queue = hub + .queues + .get(queue_id) + .map_err(|_| DeviceError::InvalidQueueId)?; + + let device = queue.device.as_ref().unwrap(); + + let (staging_buffer, staging_buffer_ptr) = + prepare_staging_buffer(device, buffer_size.get(), device.instance_flags)?; + + let fid = hub.staging_buffers.prepare(id_in); + let (id, _) = fid.assign(staging_buffer); + resource_log!("Queue::create_staging_buffer {id:?}"); + + Ok((id, staging_buffer_ptr)) + } + + pub fn queue_write_staging_buffer<A: HalApi>( + &self, + queue_id: QueueId, + buffer_id: id::BufferId, + buffer_offset: wgt::BufferAddress, + staging_buffer_id: id::StagingBufferId, + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::write_staging_buffer"); + let hub = A::hub(self); + + let queue = hub + .queues + .get(queue_id) + .map_err(|_| DeviceError::InvalidQueueId)?; + + let device = queue.device.as_ref().unwrap(); + + let staging_buffer = hub.staging_buffers.unregister(staging_buffer_id); + if staging_buffer.is_none() { + return Err(QueueWriteError::Transfer(TransferError::InvalidBuffer( + buffer_id, + ))); + } + let staging_buffer = staging_buffer.unwrap(); + let mut pending_writes = device.pending_writes.lock(); + let pending_writes = pending_writes.as_mut().unwrap(); + + // At this point, we have taken ownership of the staging_buffer from the + // user. Platform validation requires that the staging buffer always + // be freed, even if an error occurs. All paths from here must call + // `device.pending_writes.consume`. + if let Err(flush_error) = unsafe { staging_buffer.flush(device.raw()) } { + pending_writes.consume(staging_buffer); + return Err(flush_error.into()); + } + + let result = self.queue_write_staging_buffer_impl( + device, + pending_writes, + &staging_buffer, + buffer_id, + buffer_offset, + ); + + pending_writes.consume(staging_buffer); + result + } + + pub fn queue_validate_write_buffer<A: HalApi>( + &self, + _queue_id: QueueId, + buffer_id: id::BufferId, + buffer_offset: u64, + buffer_size: u64, + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::validate_write_buffer"); + let hub = A::hub(self); + + let buffer = hub + .buffers + .get(buffer_id) + .map_err(|_| TransferError::InvalidBuffer(buffer_id))?; + + self.queue_validate_write_buffer_impl(&buffer, buffer_id, buffer_offset, buffer_size)?; + + Ok(()) + } + + fn queue_validate_write_buffer_impl<A: HalApi>( + &self, + buffer: &Buffer<A>, + buffer_id: id::BufferId, + buffer_offset: u64, + buffer_size: u64, + ) -> Result<(), TransferError> { + if !buffer.usage.contains(wgt::BufferUsages::COPY_DST) { + return Err(TransferError::MissingCopyDstUsageFlag( + Some(buffer_id), + None, + )); + } + if buffer_size % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedCopySize(buffer_size)); + } + if buffer_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedBufferOffset(buffer_offset)); + } + if buffer_offset + buffer_size > buffer.size { + return Err(TransferError::BufferOverrun { + start_offset: buffer_offset, + end_offset: buffer_offset + buffer_size, + buffer_size: buffer.size, + side: CopySide::Destination, + }); + } + + Ok(()) + } + + fn queue_write_staging_buffer_impl<A: HalApi>( + &self, + device: &Device<A>, + pending_writes: &mut PendingWrites<A>, + staging_buffer: &StagingBuffer<A>, + buffer_id: id::BufferId, + buffer_offset: u64, + ) -> Result<(), QueueWriteError> { + let hub = A::hub(self); + + let (dst, transition) = { + let buffer_guard = hub.buffers.read(); + let dst = buffer_guard + .get(buffer_id) + .map_err(|_| TransferError::InvalidBuffer(buffer_id))?; + let mut trackers = device.trackers.lock(); + trackers + .buffers + .set_single(dst, hal::BufferUses::COPY_DST) + .ok_or(TransferError::InvalidBuffer(buffer_id))? + }; + let snatch_guard = device.snatchable_lock.read(); + let dst_raw = dst + .raw + .get(&snatch_guard) + .ok_or(TransferError::InvalidBuffer(buffer_id))?; + + if dst.device.as_info().id() != device.as_info().id() { + return Err(DeviceError::WrongDevice.into()); + } + + let src_buffer_size = staging_buffer.size; + self.queue_validate_write_buffer_impl(&dst, buffer_id, buffer_offset, src_buffer_size)?; + + dst.info + .use_at(device.active_submission_index.load(Ordering::Relaxed) + 1); + + let region = wgt::BufferSize::new(src_buffer_size).map(|size| hal::BufferCopy { + src_offset: 0, + dst_offset: buffer_offset, + size, + }); + let inner_buffer = staging_buffer.raw.lock(); + let barriers = iter::once(hal::BufferBarrier { + buffer: inner_buffer.as_ref().unwrap(), + usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, + }) + .chain(transition.map(|pending| pending.into_hal(&dst, &snatch_guard))); + let encoder = pending_writes.activate(); + unsafe { + encoder.transition_buffers(barriers); + encoder.copy_buffer_to_buffer( + inner_buffer.as_ref().unwrap(), + dst_raw, + region.into_iter(), + ); + } + let dst = hub.buffers.get(buffer_id).unwrap(); + pending_writes.dst_buffers.insert(buffer_id, dst.clone()); + + // Ensure the overwritten bytes are marked as initialized so + // they don't need to be nulled prior to mapping or binding. + { + dst.initialization_status + .write() + .drain(buffer_offset..(buffer_offset + src_buffer_size)); + } + + Ok(()) + } + + pub fn queue_write_texture<A: HalApi>( + &self, + queue_id: QueueId, + destination: &ImageCopyTexture, + data: &[u8], + data_layout: &wgt::ImageDataLayout, + size: &wgt::Extent3d, + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::write_texture"); + api_log!("Queue::write_texture {:?} {size:?}", destination.texture); + + let hub = A::hub(self); + + let queue = hub + .queues + .get(queue_id) + .map_err(|_| DeviceError::InvalidQueueId)?; + + let device = queue.device.as_ref().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + let data_path = trace.make_binary("bin", data); + trace.add(Action::WriteTexture { + to: *destination, + data: data_path, + layout: *data_layout, + size: *size, + }); + } + + if size.width == 0 || size.height == 0 || size.depth_or_array_layers == 0 { + log::trace!("Ignoring write_texture of size 0"); + return Ok(()); + } + + let dst = hub + .textures + .get(destination.texture) + .map_err(|_| TransferError::InvalidTexture(destination.texture))?; + + if dst.device.as_info().id() != queue_id.transmute() { + return Err(DeviceError::WrongDevice.into()); + } + + if !dst.desc.usage.contains(wgt::TextureUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), + ); + } + + // Note: Doing the copy range validation early is important because ensures that the + // dimensions are not going to cause overflow in other parts of the validation. + let (hal_copy_size, array_layer_count) = + validate_texture_copy_range(destination, &dst.desc, CopySide::Destination, size)?; + + let (selector, dst_base) = extract_texture_selector(destination, size, &dst)?; + + if !dst_base.aspect.is_one() { + return Err(TransferError::CopyAspectNotOne.into()); + } + + if !conv::is_valid_copy_dst_texture_format(dst.desc.format, destination.aspect) { + return Err(TransferError::CopyToForbiddenTextureFormat { + format: dst.desc.format, + aspect: destination.aspect, + } + .into()); + } + + // Note: `_source_bytes_per_array_layer` is ignored since we + // have a staging copy, and it can have a different value. + let (_, _source_bytes_per_array_layer) = validate_linear_texture_data( + data_layout, + dst.desc.format, + destination.aspect, + data.len() as wgt::BufferAddress, + CopySide::Source, + size, + false, + )?; + + if dst.desc.format.is_depth_stencil_format() { + device + .require_downlevel_flags(wgt::DownlevelFlags::DEPTH_TEXTURE_AND_BUFFER_COPIES) + .map_err(TransferError::from)?; + } + + let (block_width, block_height) = dst.desc.format.block_dimensions(); + let width_blocks = size.width / block_width; + let height_blocks = size.height / block_height; + + let block_rows_per_image = data_layout.rows_per_image.unwrap_or( + // doesn't really matter because we need this only if we copy + // more than one layer, and then we validate for this being not + // None + height_blocks, + ); + + let block_size = dst + .desc + .format + .block_copy_size(Some(destination.aspect)) + .unwrap(); + let bytes_per_row_alignment = + get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size); + let stage_bytes_per_row = + wgt::math::align_to(block_size * width_blocks, bytes_per_row_alignment); + + let block_rows_in_copy = + (size.depth_or_array_layers - 1) * block_rows_per_image + height_blocks; + let stage_size = stage_bytes_per_row as u64 * block_rows_in_copy as u64; + + let mut pending_writes = device.pending_writes.lock(); + let pending_writes = pending_writes.as_mut().unwrap(); + let encoder = pending_writes.activate(); + + // If the copy does not fully cover the layers, we need to initialize to + // zero *first* as we don't keep track of partial texture layer inits. + // + // Strictly speaking we only need to clear the areas of a layer + // untouched, but this would get increasingly messy. + let init_layer_range = if dst.desc.dimension == wgt::TextureDimension::D3 { + // volume textures don't have a layer range as array volumes aren't supported + 0..1 + } else { + destination.origin.z..destination.origin.z + size.depth_or_array_layers + }; + let mut dst_initialization_status = dst.initialization_status.write(); + if dst_initialization_status.mips[destination.mip_level as usize] + .check(init_layer_range.clone()) + .is_some() + { + if has_copy_partial_init_tracker_coverage(size, destination.mip_level, &dst.desc) { + for layer_range in dst_initialization_status.mips[destination.mip_level as usize] + .drain(init_layer_range) + .collect::<Vec<std::ops::Range<u32>>>() + { + let mut trackers = device.trackers.lock(); + crate::command::clear_texture( + &dst, + TextureInitRange { + mip_range: destination.mip_level..(destination.mip_level + 1), + layer_range, + }, + encoder, + &mut trackers.textures, + &device.alignments, + device.zero_buffer.as_ref().unwrap(), + ) + .map_err(QueueWriteError::from)?; + } + } else { + dst_initialization_status.mips[destination.mip_level as usize] + .drain(init_layer_range); + } + } + + let snatch_guard = device.snatchable_lock.read(); + + // Re-get `dst` immutably here, so that the mutable borrow of the + // `texture_guard.get` above ends in time for the `clear_texture` + // call above. Since we've held `texture_guard` the whole time, we know + // the texture hasn't gone away in the mean time, so we can unwrap. + let dst = hub.textures.get(destination.texture).unwrap(); + dst.info + .use_at(device.active_submission_index.load(Ordering::Relaxed) + 1); + + let dst_raw = dst + .raw(&snatch_guard) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + + let bytes_per_row = data_layout + .bytes_per_row + .unwrap_or(width_blocks * block_size); + + // Platform validation requires that the staging buffer always be + // freed, even if an error occurs. All paths from here must call + // `device.pending_writes.consume`. + let (staging_buffer, staging_buffer_ptr) = + prepare_staging_buffer(device, stage_size, device.instance_flags)?; + + let stage_fid = hub.staging_buffers.request(); + let staging_buffer = stage_fid.init(staging_buffer); + + if stage_bytes_per_row == bytes_per_row { + profiling::scope!("copy aligned"); + // Fast path if the data is already being aligned optimally. + unsafe { + ptr::copy_nonoverlapping( + data.as_ptr().offset(data_layout.offset as isize), + staging_buffer_ptr, + stage_size as usize, + ); + } + } else { + profiling::scope!("copy chunked"); + // Copy row by row into the optimal alignment. + let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize; + for layer in 0..size.depth_or_array_layers { + let rows_offset = layer * block_rows_per_image; + for row in 0..height_blocks { + unsafe { + ptr::copy_nonoverlapping( + data.as_ptr().offset( + data_layout.offset as isize + + (rows_offset + row) as isize * bytes_per_row as isize, + ), + staging_buffer_ptr.offset( + (rows_offset + row) as isize * stage_bytes_per_row as isize, + ), + copy_bytes_per_row, + ); + } + } + } + } + + if let Err(e) = unsafe { staging_buffer.flush(device.raw()) } { + pending_writes.consume(staging_buffer); + return Err(e.into()); + } + + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = dst_base.clone(); + texture_base.array_layer += rel_array_layer; + hal::BufferTextureCopy { + buffer_layout: wgt::ImageDataLayout { + offset: rel_array_layer as u64 + * block_rows_per_image as u64 + * stage_bytes_per_row as u64, + bytes_per_row: Some(stage_bytes_per_row), + rows_per_image: Some(block_rows_per_image), + }, + texture_base, + size: hal_copy_size, + } + }); + + { + let inner_buffer = staging_buffer.raw.lock(); + let barrier = hal::BufferBarrier { + buffer: inner_buffer.as_ref().unwrap(), + usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, + }; + + let mut trackers = device.trackers.lock(); + let transition = trackers + .textures + .set_single(&dst, selector, hal::TextureUses::COPY_DST) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + unsafe { + encoder.transition_textures(transition.map(|pending| pending.into_hal(dst_raw))); + encoder.transition_buffers(iter::once(barrier)); + encoder.copy_buffer_to_texture(inner_buffer.as_ref().unwrap(), dst_raw, regions); + } + } + + pending_writes.consume(staging_buffer); + pending_writes + .dst_textures + .insert(destination.texture, dst.clone()); + + Ok(()) + } + + #[cfg(webgl)] + pub fn queue_copy_external_image_to_texture<A: HalApi>( + &self, + queue_id: QueueId, + source: &wgt::ImageCopyExternalImage, + destination: crate::command::ImageCopyTextureTagged, + size: wgt::Extent3d, + ) -> Result<(), QueueWriteError> { + profiling::scope!("Queue::copy_external_image_to_texture"); + + let hub = A::hub(self); + + let queue = hub + .queues + .get(queue_id) + .map_err(|_| DeviceError::InvalidQueueId)?; + + let device = queue.device.as_ref().unwrap(); + + if size.width == 0 || size.height == 0 || size.depth_or_array_layers == 0 { + log::trace!("Ignoring write_texture of size 0"); + return Ok(()); + } + + let mut needs_flag = false; + needs_flag |= matches!(source.source, wgt::ExternalImageSource::OffscreenCanvas(_)); + needs_flag |= source.origin != wgt::Origin2d::ZERO; + needs_flag |= destination.color_space != wgt::PredefinedColorSpace::Srgb; + #[allow(clippy::bool_comparison)] + if matches!(source.source, wgt::ExternalImageSource::ImageBitmap(_)) { + needs_flag |= source.flip_y != false; + needs_flag |= destination.premultiplied_alpha != false; + } + + if needs_flag { + device + .require_downlevel_flags(wgt::DownlevelFlags::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES) + .map_err(TransferError::from)?; + } + + let src_width = source.source.width(); + let src_height = source.source.height(); + + let dst = hub.textures.get(destination.texture).unwrap(); + + if !conv::is_valid_external_image_copy_dst_texture_format(dst.desc.format) { + return Err( + TransferError::ExternalCopyToForbiddenTextureFormat(dst.desc.format).into(), + ); + } + if dst.desc.dimension != wgt::TextureDimension::D2 { + return Err(TransferError::InvalidDimensionExternal(destination.texture).into()); + } + if !dst.desc.usage.contains(wgt::TextureUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), + ); + } + if !dst + .desc + .usage + .contains(wgt::TextureUsages::RENDER_ATTACHMENT) + { + return Err( + TransferError::MissingRenderAttachmentUsageFlag(destination.texture).into(), + ); + } + if dst.desc.sample_count != 1 { + return Err(TransferError::InvalidSampleCount { + sample_count: dst.desc.sample_count, + } + .into()); + } + + if source.origin.x + size.width > src_width { + return Err(TransferError::TextureOverrun { + start_offset: source.origin.x, + end_offset: source.origin.x + size.width, + texture_size: src_width, + dimension: crate::resource::TextureErrorDimension::X, + side: CopySide::Source, + } + .into()); + } + if source.origin.y + size.height > src_height { + return Err(TransferError::TextureOverrun { + start_offset: source.origin.y, + end_offset: source.origin.y + size.height, + texture_size: src_height, + dimension: crate::resource::TextureErrorDimension::Y, + side: CopySide::Source, + } + .into()); + } + if size.depth_or_array_layers != 1 { + return Err(TransferError::TextureOverrun { + start_offset: 0, + end_offset: size.depth_or_array_layers, + texture_size: 1, + dimension: crate::resource::TextureErrorDimension::Z, + side: CopySide::Source, + } + .into()); + } + + // Note: Doing the copy range validation early is important because ensures that the + // dimensions are not going to cause overflow in other parts of the validation. + let (hal_copy_size, _) = validate_texture_copy_range( + &destination.to_untagged(), + &dst.desc, + CopySide::Destination, + &size, + )?; + + let (selector, dst_base) = + extract_texture_selector(&destination.to_untagged(), &size, &dst)?; + + let mut pending_writes = device.pending_writes.lock(); + let encoder = pending_writes.as_mut().unwrap().activate(); + + // If the copy does not fully cover the layers, we need to initialize to + // zero *first* as we don't keep track of partial texture layer inits. + // + // Strictly speaking we only need to clear the areas of a layer + // untouched, but this would get increasingly messy. + let init_layer_range = if dst.desc.dimension == wgt::TextureDimension::D3 { + // volume textures don't have a layer range as array volumes aren't supported + 0..1 + } else { + destination.origin.z..destination.origin.z + size.depth_or_array_layers + }; + let mut dst_initialization_status = dst.initialization_status.write(); + if dst_initialization_status.mips[destination.mip_level as usize] + .check(init_layer_range.clone()) + .is_some() + { + if has_copy_partial_init_tracker_coverage(&size, destination.mip_level, &dst.desc) { + for layer_range in dst_initialization_status.mips[destination.mip_level as usize] + .drain(init_layer_range) + .collect::<Vec<std::ops::Range<u32>>>() + { + let mut trackers = device.trackers.lock(); + crate::command::clear_texture( + &dst, + TextureInitRange { + mip_range: destination.mip_level..(destination.mip_level + 1), + layer_range, + }, + encoder, + &mut trackers.textures, + &device.alignments, + device.zero_buffer.as_ref().unwrap(), + ) + .map_err(QueueWriteError::from)?; + } + } else { + dst_initialization_status.mips[destination.mip_level as usize] + .drain(init_layer_range); + } + } + dst.info + .use_at(device.active_submission_index.load(Ordering::Relaxed) + 1); + + let snatch_guard = device.snatchable_lock.read(); + let dst_raw = dst + .raw(&snatch_guard) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + + let regions = hal::TextureCopy { + src_base: hal::TextureCopyBase { + mip_level: 0, + array_layer: 0, + origin: source.origin.to_3d(0), + aspect: hal::FormatAspects::COLOR, + }, + dst_base, + size: hal_copy_size, + }; + + unsafe { + let mut trackers = device.trackers.lock(); + let transitions = trackers + .textures + .set_single(&dst, selector, hal::TextureUses::COPY_DST) + .ok_or(TransferError::InvalidTexture(destination.texture))?; + encoder.transition_textures(transitions.map(|pending| pending.into_hal(dst_raw))); + encoder.copy_external_image_to_texture( + source, + dst_raw, + destination.premultiplied_alpha, + iter::once(regions), + ); + } + + Ok(()) + } + + pub fn queue_submit<A: HalApi>( + &self, + queue_id: QueueId, + command_buffer_ids: &[id::CommandBufferId], + ) -> Result<WrappedSubmissionIndex, QueueSubmitError> { + profiling::scope!("Queue::submit"); + api_log!("Queue::submit {queue_id:?}"); + + let (submit_index, callbacks) = { + let hub = A::hub(self); + + let queue = hub + .queues + .get(queue_id) + .map_err(|_| DeviceError::InvalidQueueId)?; + + let device = queue.device.as_ref().unwrap(); + + let mut fence = device.fence.write(); + let fence = fence.as_mut().unwrap(); + let submit_index = device + .active_submission_index + .fetch_add(1, Ordering::Relaxed) + + 1; + let mut active_executions = Vec::new(); + + let mut used_surface_textures = track::TextureUsageScope::new(); + + let snatch_guard = device.snatchable_lock.read(); + + let mut submit_surface_textures_owned = SmallVec::<[_; 2]>::new(); + + { + let mut command_buffer_guard = hub.command_buffers.write(); + + if !command_buffer_ids.is_empty() { + profiling::scope!("prepare"); + + //TODO: if multiple command buffers are submitted, we can re-use the last + // native command buffer of the previous chain instead of always creating + // a temporary one, since the chains are not finished. + let mut temp_suspected = device.temp_suspected.lock(); + { + let mut suspected = temp_suspected.replace(ResourceMaps::new()).unwrap(); + suspected.clear(); + } + + // finish all the command buffers first + for &cmb_id in command_buffer_ids { + // we reset the used surface textures every time we use + // it, so make sure to set_size on it. + used_surface_textures.set_size(hub.textures.read().len()); + + #[allow(unused_mut)] + let mut cmdbuf = match command_buffer_guard.replace_with_error(cmb_id) { + Ok(cmdbuf) => cmdbuf, + Err(_) => continue, + }; + + if cmdbuf.device.as_info().id() != queue_id.transmute() { + return Err(DeviceError::WrongDevice.into()); + } + + #[cfg(feature = "trace")] + if let Some(ref mut trace) = *device.trace.lock() { + trace.add(Action::Submit( + submit_index, + cmdbuf + .data + .lock() + .as_mut() + .unwrap() + .commands + .take() + .unwrap(), + )); + } + if !cmdbuf.is_finished() { + if let Some(cmdbuf) = Arc::into_inner(cmdbuf) { + device.destroy_command_buffer(cmdbuf); + } else { + panic!( + "Command buffer cannot be destroyed because is still in use" + ); + } + continue; + } + + // optimize the tracked states + // cmdbuf.trackers.optimize(); + { + let cmd_buf_data = cmdbuf.data.lock(); + let cmd_buf_trackers = &cmd_buf_data.as_ref().unwrap().trackers; + + // update submission IDs + for buffer in cmd_buf_trackers.buffers.used_resources() { + let id = buffer.info.id(); + let raw_buf = match buffer.raw.get(&snatch_guard) { + Some(raw) => raw, + None => { + return Err(QueueSubmitError::DestroyedBuffer(id)); + } + }; + buffer.info.use_at(submit_index); + if buffer.is_unique() { + if let BufferMapState::Active { .. } = *buffer.map_state.lock() + { + log::warn!("Dropped buffer has a pending mapping."); + unsafe { device.raw().unmap_buffer(raw_buf) } + .map_err(DeviceError::from)?; + } + temp_suspected + .as_mut() + .unwrap() + .buffers + .insert(id, buffer.clone()); + } else { + match *buffer.map_state.lock() { + BufferMapState::Idle => (), + _ => return Err(QueueSubmitError::BufferStillMapped(id)), + } + } + } + for texture in cmd_buf_trackers.textures.used_resources() { + let id = texture.info.id(); + let should_extend = match texture.inner.get(&snatch_guard) { + None => { + return Err(QueueSubmitError::DestroyedTexture(id)); + } + Some(TextureInner::Native { .. }) => false, + Some(TextureInner::Surface { + ref has_work, + ref raw, + .. + }) => { + has_work.store(true, Ordering::Relaxed); + + if raw.is_some() { + submit_surface_textures_owned.push(texture.clone()); + } + + true + } + }; + texture.info.use_at(submit_index); + if texture.is_unique() { + temp_suspected + .as_mut() + .unwrap() + .textures + .insert(id, texture.clone()); + } + if should_extend { + unsafe { + used_surface_textures + .merge_single(&texture, None, hal::TextureUses::PRESENT) + .unwrap(); + }; + } + } + for texture_view in cmd_buf_trackers.views.used_resources() { + texture_view.info.use_at(submit_index); + if texture_view.is_unique() { + temp_suspected + .as_mut() + .unwrap() + .texture_views + .insert(texture_view.as_info().id(), texture_view.clone()); + } + } + { + for bg in cmd_buf_trackers.bind_groups.used_resources() { + bg.info.use_at(submit_index); + // We need to update the submission indices for the contained + // state-less (!) resources as well, so that they don't get + // deleted too early if the parent bind group goes out of scope. + for view in bg.used.views.used_resources() { + view.info.use_at(submit_index); + } + for sampler in bg.used.samplers.used_resources() { + sampler.info.use_at(submit_index); + } + if bg.is_unique() { + temp_suspected + .as_mut() + .unwrap() + .bind_groups + .insert(bg.as_info().id(), bg.clone()); + } + } + } + // assert!(cmd_buf_trackers.samplers.is_empty()); + for compute_pipeline in + cmd_buf_trackers.compute_pipelines.used_resources() + { + compute_pipeline.info.use_at(submit_index); + if compute_pipeline.is_unique() { + temp_suspected.as_mut().unwrap().compute_pipelines.insert( + compute_pipeline.as_info().id(), + compute_pipeline.clone(), + ); + } + } + for render_pipeline in + cmd_buf_trackers.render_pipelines.used_resources() + { + render_pipeline.info.use_at(submit_index); + if render_pipeline.is_unique() { + temp_suspected.as_mut().unwrap().render_pipelines.insert( + render_pipeline.as_info().id(), + render_pipeline.clone(), + ); + } + } + for query_set in cmd_buf_trackers.query_sets.used_resources() { + query_set.info.use_at(submit_index); + if query_set.is_unique() { + temp_suspected + .as_mut() + .unwrap() + .query_sets + .insert(query_set.as_info().id(), query_set.clone()); + } + } + for bundle in cmd_buf_trackers.bundles.used_resources() { + bundle.info.use_at(submit_index); + // We need to update the submission indices for the contained + // state-less (!) resources as well, excluding the bind groups. + // They don't get deleted too early if the bundle goes out of scope. + for render_pipeline in + bundle.used.render_pipelines.read().used_resources() + { + render_pipeline.info.use_at(submit_index); + } + for query_set in bundle.used.query_sets.read().used_resources() { + query_set.info.use_at(submit_index); + } + if bundle.is_unique() { + temp_suspected + .as_mut() + .unwrap() + .render_bundles + .insert(bundle.as_info().id(), bundle.clone()); + } + } + } + let mut baked = cmdbuf.from_arc_into_baked(); + // execute resource transitions + unsafe { + baked + .encoder + .begin_encoding(hal_label( + Some("(wgpu internal) Transit"), + device.instance_flags, + )) + .map_err(DeviceError::from)? + }; + log::trace!("Stitching command buffer {:?} before submission", cmb_id); + + //Note: locking the trackers has to be done after the storages + let mut trackers = device.trackers.lock(); + baked + .initialize_buffer_memory(&mut *trackers) + .map_err(|err| QueueSubmitError::DestroyedBuffer(err.0))?; + baked + .initialize_texture_memory(&mut *trackers, device) + .map_err(|err| QueueSubmitError::DestroyedTexture(err.0))?; + //Note: stateless trackers are not merged: + // device already knows these resources exist. + CommandBuffer::insert_barriers_from_tracker( + &mut baked.encoder, + &mut *trackers, + &baked.trackers, + &snatch_guard, + ); + + let transit = unsafe { baked.encoder.end_encoding().unwrap() }; + baked.list.insert(0, transit); + + // Transition surface textures into `Present` state. + // Note: we could technically do it after all of the command buffers, + // but here we have a command encoder by hand, so it's easier to use it. + if !used_surface_textures.is_empty() { + unsafe { + baked + .encoder + .begin_encoding(hal_label( + Some("(wgpu internal) Present"), + device.instance_flags, + )) + .map_err(DeviceError::from)? + }; + trackers + .textures + .set_from_usage_scope(&used_surface_textures); + let (transitions, textures) = + trackers.textures.drain_transitions(&snatch_guard); + let texture_barriers = transitions + .into_iter() + .enumerate() + .map(|(i, p)| p.into_hal(textures[i].unwrap().raw().unwrap())); + let present = unsafe { + baked.encoder.transition_textures(texture_barriers); + baked.encoder.end_encoding().unwrap() + }; + baked.list.push(present); + used_surface_textures = track::TextureUsageScope::new(); + } + + // done + active_executions.push(EncoderInFlight { + raw: baked.encoder, + cmd_buffers: baked.list, + }); + } + + log::trace!("Device after submission {}", submit_index); + } + } + + let mut pending_writes = device.pending_writes.lock(); + let pending_writes = pending_writes.as_mut().unwrap(); + + { + used_surface_textures.set_size(hub.textures.read().len()); + for (&id, texture) in pending_writes.dst_textures.iter() { + match texture.inner.get(&snatch_guard) { + None => { + return Err(QueueSubmitError::DestroyedTexture(id)); + } + Some(TextureInner::Native { .. }) => {} + Some(TextureInner::Surface { + ref has_work, + ref raw, + .. + }) => { + has_work.store(true, Ordering::Relaxed); + + if raw.is_some() { + submit_surface_textures_owned.push(texture.clone()); + } + + unsafe { + used_surface_textures + .merge_single(texture, None, hal::TextureUses::PRESENT) + .unwrap() + }; + } + } + } + + if !used_surface_textures.is_empty() { + let mut trackers = device.trackers.lock(); + + trackers + .textures + .set_from_usage_scope(&used_surface_textures); + let (transitions, textures) = + trackers.textures.drain_transitions(&snatch_guard); + let texture_barriers = transitions + .into_iter() + .enumerate() + .map(|(i, p)| p.into_hal(textures[i].unwrap().raw().unwrap())); + unsafe { + pending_writes + .command_encoder + .transition_textures(texture_barriers); + }; + } + } + + let refs = pending_writes + .pre_submit()? + .into_iter() + .chain( + active_executions + .iter() + .flat_map(|pool_execution| pool_execution.cmd_buffers.iter()), + ) + .collect::<Vec<_>>(); + + let mut submit_surface_textures = + SmallVec::<[_; 2]>::with_capacity(submit_surface_textures_owned.len()); + + for texture in &submit_surface_textures_owned { + submit_surface_textures.extend(match texture.inner.get(&snatch_guard) { + Some(TextureInner::Surface { raw, .. }) => raw.as_ref(), + _ => None, + }); + } + + unsafe { + queue + .raw + .as_ref() + .unwrap() + .submit(&refs, &submit_surface_textures, Some((fence, submit_index))) + .map_err(DeviceError::from)?; + } + + profiling::scope!("cleanup"); + if let Some(pending_execution) = pending_writes.post_submit( + device.command_allocator.lock().as_mut().unwrap(), + device.raw(), + queue.raw.as_ref().unwrap(), + ) { + active_executions.push(pending_execution); + } + + // this will register the new submission to the life time tracker + let mut pending_write_resources = mem::take(&mut pending_writes.temp_resources); + device.lock_life().track_submission( + submit_index, + pending_write_resources.drain(..), + active_executions, + ); + + // This will schedule destruction of all resources that are no longer needed + // by the user but used in the command stream, among other things. + let (closures, _) = match device.maintain(fence, wgt::Maintain::Poll) { + Ok(closures) => closures, + Err(WaitIdleError::Device(err)) => return Err(QueueSubmitError::Queue(err)), + Err(WaitIdleError::StuckGpu) => return Err(QueueSubmitError::StuckGpu), + Err(WaitIdleError::WrongSubmissionIndex(..)) => unreachable!(), + }; + + // pending_write_resources has been drained, so it's empty, but we + // want to retain its heap allocation. + pending_writes.temp_resources = pending_write_resources; + device.lock_life().post_submit(); + + (submit_index, closures) + }; + + // the closures should execute with nothing locked! + callbacks.fire(); + + Ok(WrappedSubmissionIndex { + queue_id, + index: submit_index, + }) + } + + pub fn queue_get_timestamp_period<A: HalApi>( + &self, + queue_id: QueueId, + ) -> Result<f32, InvalidQueue> { + let hub = A::hub(self); + match hub.queues.get(queue_id) { + Ok(queue) => Ok(unsafe { queue.raw.as_ref().unwrap().get_timestamp_period() }), + Err(_) => Err(InvalidQueue), + } + } + + pub fn queue_on_submitted_work_done<A: HalApi>( + &self, + queue_id: QueueId, + closure: SubmittedWorkDoneClosure, + ) -> Result<(), InvalidQueue> { + api_log!("Queue::on_submitted_work_done {queue_id:?}"); + + //TODO: flush pending writes + let hub = A::hub(self); + match hub.queues.get(queue_id) { + Ok(queue) => queue + .device + .as_ref() + .unwrap() + .lock_life() + .add_work_done_closure(closure), + Err(_) => return Err(InvalidQueue), + } + Ok(()) + } +} |