diff options
Diffstat (limited to 'third_party/rust/gpu-alloc')
-rw-r--r-- | third_party/rust/gpu-alloc/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/Cargo.toml | 21 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/allocator.rs | 498 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/block.rs | 351 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/buddy.rs | 467 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/config.rs | 72 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/error.rs | 116 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/heap.rs | 36 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/lib.rs | 92 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/linear.rs | 307 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/slab.rs | 97 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/usage.rs | 153 |
12 files changed, 2211 insertions, 0 deletions
diff --git a/third_party/rust/gpu-alloc/.cargo-checksum.json b/third_party/rust/gpu-alloc/.cargo-checksum.json new file mode 100644 index 0000000000..1b7e301c6c --- /dev/null +++ b/third_party/rust/gpu-alloc/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"2ab88872a71201c9075c960d0c4c9410127799e0d5b4f098604938d096b922c6","src/allocator.rs":"6b39cd8aff03c2b9ced102352674a14b949c092cdb8f57c138e5c1dca0cd1a49","src/block.rs":"bb631a33234a083fb8db68ad09aae650681ae3eb3a8131bf3e38f60a867ff959","src/buddy.rs":"3bac425aebd07ba9b2e7aee95b31d17fc019cd9c6352b8c921453587ec533b4b","src/config.rs":"e8d072527babbcd622066fd279ec7c0aa063c48b0d025d056df111990fdae9b3","src/error.rs":"75bc7cb021b5db5b7bfcd45567bb2e2c65916ee1ab0dcacd736232c4a539b02f","src/heap.rs":"010c310039a765b7bd9f4c31dc5461ca4881686e826fefb3a6ce1f8457a17ea9","src/lib.rs":"1dc1615508dc17f87fb7ec855aa6634170e34b4655bdc70eff7a7307c8c9b518","src/linear.rs":"d3a132201fde09adbf168a29b52de44b9dc1cf7bd757ef33df47fa795b2e9d9e","src/slab.rs":"9e7dfedd5b6e9f9fdcbcb596ed6561e2b38ceb87a8c535057110053e64fc5bc7","src/usage.rs":"bbbedfb2a17e22b2eb2d2e4e1dfe37fd4c89a038a1b9467a4d1e5634304803ea"},"package":null}
\ No newline at end of file diff --git a/third_party/rust/gpu-alloc/Cargo.toml b/third_party/rust/gpu-alloc/Cargo.toml new file mode 100644 index 0000000000..9376be4394 --- /dev/null +++ b/third_party/rust/gpu-alloc/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "gpu-alloc" +version = "0.2.1" +authors = ["Zakarum <zakarumych@ya.ru>"] +edition = "2018" +description = "Implementation agnostic memory allocator for Vulkan like APIs" +keywords = ["gpu", "vulkan", "allocation", "no-std"] +license = "MIT OR Apache-2.0" +documentation = "https://docs.rs/gpu-alloc-types/0.2.1" +homepage = "https://github.com/zakarumych/gpu-alloc" +repository = "https://github.com/zakarumych/gpu-alloc" + +[features] +std = [] +default = ["std"] + +[dependencies] +gpu-alloc-types = { path = "../types", version = "0.1" } +tracing = { version = "0.1", optional = true, default-features = false } +bitflags = { version = "1.2", default-features = false } +serde = { version = "1.0", optional = true, default-features = false, features = ["derive"] } diff --git a/third_party/rust/gpu-alloc/src/allocator.rs b/third_party/rust/gpu-alloc/src/allocator.rs new file mode 100644 index 0000000000..a04e7c4fb4 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/allocator.rs @@ -0,0 +1,498 @@ +use { + crate::{ + align_down, + block::{MemoryBlock, MemoryBlockFlavor}, + buddy::{BuddyAllocator, BuddyBlock}, + config::Config, + error::AllocationError, + heap::Heap, + linear::{LinearAllocator, LinearBlock}, + usage::{MemoryForUsage, UsageFlags}, + MemoryBounds, Request, + }, + alloc::boxed::Box, + core::convert::TryFrom as _, + gpu_alloc_types::{ + AllocationFlags, DeviceProperties, MemoryDevice, MemoryPropertyFlags, MemoryType, + OutOfMemory, + }, +}; + +/// Memory allocator for Vulkan-like APIs. +#[derive(Debug)] +pub struct GpuAllocator<M> { + dedicated_treshold: u64, + preferred_dedicated_treshold: u64, + transient_dedicated_treshold: u64, + max_memory_allocation_size: u64, + memory_for_usage: MemoryForUsage, + memory_types: Box<[MemoryType]>, + memory_heaps: Box<[Heap]>, + max_allocation_count: u32, + allocations_remains: u32, + non_coherent_atom_mask: u64, + linear_chunk: u64, + minimal_buddy_size: u64, + initial_buddy_dedicated_size: u64, + buffer_device_address: bool, + + linear_allocators: Box<[Option<LinearAllocator<M>>]>, + buddy_allocators: Box<[Option<BuddyAllocator<M>>]>, +} + +/// Hints for allocator to decide on allocation strategy. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum Dedicated { + /// Allocation directly from device.\ + /// Very slow. + /// Count of allocations is limited.\ + /// Use with caution.\ + /// Must be used if resource has to be bound to dedicated memory object. + Required, + + /// Hint for allocator that dedicated memory object is preferred.\ + /// Should be used if it is known that resource placed in dedicated memory object + /// would allow for better performance.\ + /// Implementation is allowed to return block to shared memory object. + Preferred, +} + +impl<M> GpuAllocator<M> +where + M: MemoryBounds + 'static, +{ + /// Creates new instance of `GpuAllocator`. + /// Provided `DeviceProperties` should match propertices of `MemoryDevice` that will be used + /// with created `GpuAllocator` instance. + #[cfg_attr(feature = "tracing", tracing::instrument)] + pub fn new(config: Config, props: DeviceProperties<'_>) -> Self { + assert!( + props.non_coherent_atom_size.is_power_of_two(), + "`non_coherent_atom_size` must be power of two" + ); + + assert!( + isize::try_from(props.non_coherent_atom_size).is_ok(), + "`non_coherent_atom_size` must fit host address space" + ); + + GpuAllocator { + dedicated_treshold: config + .dedicated_treshold + .max(props.max_memory_allocation_size), + preferred_dedicated_treshold: config + .preferred_dedicated_treshold + .min(config.dedicated_treshold) + .max(props.max_memory_allocation_size), + + transient_dedicated_treshold: config + .transient_dedicated_treshold + .max(config.dedicated_treshold) + .max(props.max_memory_allocation_size), + + max_memory_allocation_size: props.max_memory_allocation_size, + + memory_for_usage: MemoryForUsage::new(props.memory_types.as_ref()), + + memory_types: props.memory_types.as_ref().iter().copied().collect(), + memory_heaps: props + .memory_heaps + .as_ref() + .iter() + .map(|heap| Heap::new(heap.size)) + .collect(), + + buffer_device_address: props.buffer_device_address, + + max_allocation_count: props.max_memory_allocation_count, + allocations_remains: props.max_memory_allocation_count, + non_coherent_atom_mask: props.non_coherent_atom_size - 1, + + linear_chunk: config.linear_chunk, + minimal_buddy_size: config.minimal_buddy_size, + initial_buddy_dedicated_size: config.initial_buddy_dedicated_size, + + linear_allocators: props.memory_types.as_ref().iter().map(|_| None).collect(), + buddy_allocators: props.memory_types.as_ref().iter().map(|_| None).collect(), + } + } + + /// Allocates memory block from specified `device` according to the `request`. + /// + /// # Safety + /// + /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance. + /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance + /// and memory blocks allocated from it. + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn alloc( + &mut self, + device: &impl MemoryDevice<M>, + request: Request, + ) -> Result<MemoryBlock<M>, AllocationError> + where + M: Clone, + { + self.alloc_internal(device, request, None) + } + + /// Allocates memory block from specified `device` according to the `request`. + /// This function allows user to force specific allocation strategy. + /// Improper use can lead to suboptimal performance or too large overhead. + /// Prefer `GpuAllocator::alloc` if doubt. + /// + /// # Safety + /// + /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance. + /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance + /// and memory blocks allocated from it. + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn alloc_with_dedicated( + &mut self, + device: &impl MemoryDevice<M>, + request: Request, + dedicated: Dedicated, + ) -> Result<MemoryBlock<M>, AllocationError> + where + M: Clone, + { + self.alloc_internal(device, request, Some(dedicated)) + } + + unsafe fn alloc_internal( + &mut self, + device: &impl MemoryDevice<M>, + mut request: Request, + dedicated: Option<Dedicated>, + ) -> Result<MemoryBlock<M>, AllocationError> + where + M: Clone, + { + enum Strategy { + Linear, + Buddy, + Dedicated, + } + + request.usage = with_implicit_usage_flags(request.usage); + + if request.usage.contains(UsageFlags::DEVICE_ADDRESS) { + assert!(self.buffer_device_address, "`DEVICE_ADDRESS` cannot be requested when `DeviceProperties::buffer_device_address` is false"); + } + + if request.size > self.max_memory_allocation_size { + return Err(AllocationError::OutOfDeviceMemory); + } + + if let Some(Dedicated::Required) = dedicated { + if self.allocations_remains == 0 { + return Err(AllocationError::TooManyObjects); + } + } + + if 0 == self.memory_for_usage.mask(request.usage) & request.memory_types { + #[cfg(feature = "tracing")] + tracing::error!( + "Cannot serve request {:?}, no memory among bitset `{}` support usage {:?}", + request, + request.memory_types, + request.usage + ); + + return Err(AllocationError::NoCompatibleMemoryTypes); + } + + let transient = request.usage.contains(UsageFlags::TRANSIENT); + + for &index in self.memory_for_usage.types(request.usage) { + if 0 == request.memory_types & (1 << index) { + // Skip memory type incompatible with the request. + continue; + } + + let memory_type = &self.memory_types[index as usize]; + let heap = memory_type.heap; + let heap = &mut self.memory_heaps[heap as usize]; + + let atom_mask = if host_visible_non_coherent(memory_type.props) { + self.non_coherent_atom_mask + } else { + 0 + }; + + let flags = if self.buffer_device_address { + AllocationFlags::DEVICE_ADDRESS + } else { + AllocationFlags::empty() + }; + + let linear_chunk = align_down(self.linear_chunk.min(heap.size() / 32), atom_mask); + + let strategy = match (dedicated, transient) { + (Some(Dedicated::Required), _) => Strategy::Dedicated, + (Some(Dedicated::Preferred), _) + if request.size >= self.preferred_dedicated_treshold => + { + Strategy::Dedicated + } + (_, true) => { + let treshold = self.transient_dedicated_treshold.min(linear_chunk); + + if request.size < treshold { + Strategy::Linear + } else { + Strategy::Dedicated + } + } + (_, false) => { + let treshold = self.dedicated_treshold.min(heap.size() / 32); + + if request.size < treshold { + Strategy::Buddy + } else { + Strategy::Dedicated + } + } + }; + + match strategy { + Strategy::Dedicated => { + if heap.budget() < request.size { + continue; + } + + #[cfg(feature = "tracing")] + tracing::debug!( + "Allocating memory object `{}@{:?}`", + request.size, + memory_type + ); + + match device.allocate_memory(request.size, index, flags) { + Ok(memory) => { + self.allocations_remains -= 1; + heap.alloc(request.size); + + return Ok(MemoryBlock::new( + memory, + index, + memory_type.props, + 0, + request.size, + atom_mask, + MemoryBlockFlavor::Dedicated, + )); + } + Err(OutOfMemory::OutOfDeviceMemory) => continue, + Err(OutOfMemory::OutOfHostMemory) => { + return Err(AllocationError::OutOfHostMemory) + } + } + } + Strategy::Linear => { + let allocator = match &mut self.linear_allocators[index as usize] { + Some(allocator) => allocator, + slot => { + let memory_type = &self.memory_types[index as usize]; + slot.get_or_insert(LinearAllocator::new( + linear_chunk, + index, + memory_type.props, + if host_visible_non_coherent(memory_type.props) { + self.non_coherent_atom_mask + } else { + 0 + }, + )) + } + }; + let result = allocator.alloc( + device, + request.size, + request.align_mask, + flags, + heap, + &mut self.allocations_remains, + ); + + match result { + Ok(block) => { + return Ok(MemoryBlock::new( + block.memory, + index, + memory_type.props, + block.offset, + block.size, + atom_mask, + MemoryBlockFlavor::Linear { + chunk: block.chunk, + ptr: block.ptr, + }, + )) + } + Err(AllocationError::OutOfDeviceMemory) => continue, + Err(err) => return Err(err), + } + } + Strategy::Buddy => { + let allocator = match &mut self.buddy_allocators[index as usize] { + Some(allocator) => allocator, + slot => { + let minimal_buddy_size = self + .minimal_buddy_size + .min(heap.size() / 1024) + .next_power_of_two(); + + let initial_buddy_dedicated_size = self + .initial_buddy_dedicated_size + .min(heap.size() / 32) + .next_power_of_two(); + + let memory_type = &self.memory_types[index as usize]; + slot.get_or_insert(BuddyAllocator::new( + minimal_buddy_size, + initial_buddy_dedicated_size, + index, + memory_type.props, + if host_visible_non_coherent(memory_type.props) { + self.non_coherent_atom_mask + } else { + 0 + }, + )) + } + }; + let result = allocator.alloc( + device, + request.size, + request.align_mask, + flags, + heap, + &mut self.allocations_remains, + ); + + match result { + Ok(block) => { + return Ok(MemoryBlock::new( + block.memory, + index, + memory_type.props, + block.offset, + block.size, + atom_mask, + MemoryBlockFlavor::Buddy { + chunk: block.chunk, + ptr: block.ptr, + index: block.index, + }, + )) + } + Err(AllocationError::OutOfDeviceMemory) => continue, + Err(err) => return Err(err), + } + } + } + } + + Err(AllocationError::OutOfDeviceMemory) + } + + /// Deallocates memory block previously allocated from this `GpuAllocator` instance. + /// + /// # Safety + /// + /// * Memory block must have been allocated by this `GpuAllocator` instance + /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance + /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance + /// and memory blocks allocated from it + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn dealloc(&mut self, device: &impl MemoryDevice<M>, block: MemoryBlock<M>) { + let memory_type = block.memory_type(); + let offset = block.offset(); + let size = block.size(); + let (memory, flavor) = block.deallocate(); + match flavor { + MemoryBlockFlavor::Dedicated => { + let heap = self.memory_types[memory_type as usize].heap; + device.deallocate_memory(memory); + self.allocations_remains += 1; + self.memory_heaps[heap as usize].dealloc(size); + } + MemoryBlockFlavor::Linear { chunk, ptr } => { + let heap = self.memory_types[memory_type as usize].heap; + let heap = &mut self.memory_heaps[heap as usize]; + + let allocator = self.linear_allocators[memory_type as usize] + .as_mut() + .expect("Allocator should exist"); + + allocator.dealloc( + device, + LinearBlock { + offset, + size, + memory, + ptr, + chunk, + }, + heap, + &mut self.allocations_remains, + ); + } + MemoryBlockFlavor::Buddy { chunk, ptr, index } => { + let memory_type = memory_type; + let heap = self.memory_types[memory_type as usize].heap; + let heap = &mut self.memory_heaps[heap as usize]; + + let allocator = self.buddy_allocators[memory_type as usize] + .as_mut() + .expect("Allocator should exist"); + + allocator.dealloc( + device, + BuddyBlock { + offset, + size, + memory, + index, + ptr, + chunk, + }, + heap, + &mut self.allocations_remains, + ); + } + } + } + + /// Deallocates leftover memory objects. + /// Should be used before dropping. + /// + /// # Safety + /// + /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance + /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance + /// and memory blocks allocated from it + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn cleanup(&mut self, device: &impl MemoryDevice<M>) { + for allocator in self.linear_allocators.iter_mut().filter_map(Option::as_mut) { + allocator.cleanup(device); + } + } +} + +fn host_visible_non_coherent(props: MemoryPropertyFlags) -> bool { + (props & (MemoryPropertyFlags::HOST_COHERENT | MemoryPropertyFlags::HOST_VISIBLE)) + == MemoryPropertyFlags::HOST_VISIBLE +} + +fn with_implicit_usage_flags(usage: UsageFlags) -> UsageFlags { + if usage.is_empty() { + UsageFlags::FAST_DEVICE_ACCESS + } else if usage.intersects(UsageFlags::DOWNLOAD | UsageFlags::UPLOAD) { + usage | UsageFlags::HOST_ACCESS + } else { + usage + } +} diff --git a/third_party/rust/gpu-alloc/src/block.rs b/third_party/rust/gpu-alloc/src/block.rs new file mode 100644 index 0000000000..b1093fd83c --- /dev/null +++ b/third_party/rust/gpu-alloc/src/block.rs @@ -0,0 +1,351 @@ +use { + crate::{align_down, align_up, error::MapError}, + core::{ + convert::TryFrom as _, + ptr::{copy_nonoverlapping, NonNull}, + sync::atomic::{AtomicU8, Ordering::*}, + }, + gpu_alloc_types::{MappedMemoryRange, MemoryDevice, MemoryPropertyFlags}, +}; + +#[derive(Debug)] +struct Relevant; + +impl Drop for Relevant { + #[cfg(feature = "tracing")] + fn drop(&mut self) { + tracing::error!("Memory block wasn't deallocated"); + } + + #[cfg(all(not(feature = "tracing"), feature = "std"))] + fn drop(&mut self) { + eprintln!("Memory block wasn't deallocated") + } + + #[cfg(all(not(feature = "tracing"), not(feature = "std")))] + fn drop(&mut self) { + panic!("Memory block wasn't deallocated") + } +} + +const MAPPING_STATE_UNMAPPED: u8 = 0; +const MAPPING_STATE_MAPPED: u8 = 1; +const MAPPING_STATE_UNMAPPING: u8 = 2; + +/// Memory block allocated by `GpuAllocator`. +#[derive(Debug)] +pub struct MemoryBlock<M> { + memory: M, + memory_type: u32, + props: MemoryPropertyFlags, + offset: u64, + size: u64, + atom_mask: u64, + mapped: AtomicU8, + flavor: MemoryBlockFlavor, + relevant: Relevant, +} + +impl<M> MemoryBlock<M> { + pub(crate) fn new( + memory: M, + memory_type: u32, + props: MemoryPropertyFlags, + offset: u64, + size: u64, + atom_mask: u64, + flavor: MemoryBlockFlavor, + ) -> Self { + MemoryBlock { + memory, + memory_type, + props, + offset, + size, + atom_mask, + flavor, + mapped: AtomicU8::new(MAPPING_STATE_UNMAPPED), + relevant: Relevant, + } + } + + pub(crate) fn deallocate(self) -> (M, MemoryBlockFlavor) { + core::mem::forget(self.relevant); + (self.memory, self.flavor) + } +} + +unsafe impl<M> Sync for MemoryBlock<M> where M: Sync {} +unsafe impl<M> Send for MemoryBlock<M> where M: Send {} + +#[derive(Debug)] +pub(crate) enum MemoryBlockFlavor { + Dedicated, + Linear { + chunk: u64, + ptr: Option<NonNull<u8>>, + }, + Buddy { + chunk: usize, + index: usize, + ptr: Option<NonNull<u8>>, + }, +} + +impl<M> MemoryBlock<M> { + /// Returns reference to parent memory object. + #[inline(always)] + pub fn memory(&self) -> &M { + &self.memory + } + + /// Returns offset in bytes from start of memory object to start of this block. + #[inline(always)] + pub fn offset(&self) -> u64 { + self.offset + } + + /// Returns size of this memory block. + #[inline(always)] + pub fn size(&self) -> u64 { + self.size + } + + /// Returns memory property flags for parent memory object. + #[inline(always)] + pub fn props(&self) -> MemoryPropertyFlags { + self.props + } + + /// Returns index of type of parent memory object. + #[inline(always)] + pub fn memory_type(&self) -> u32 { + self.memory_type + } + + /// Returns pointer to mapped memory range of this block. + /// This blocks becomes mapped. + /// + /// The user of returned pointer must guarantee that any previously submitted command that writes to this range has completed + /// before the host reads from or writes to that range, + /// and that any previously submitted command that reads from that range has completed + /// before the host writes to that region. + /// If the device memory was allocated without the `HOST_COHERENT` property flag set, + /// these guarantees must be made for an extended range: + /// the user must round down the start of the range to the nearest multiple of `non_coherent_atom_size`, + /// and round the end of the range up to the nearest multiple of `non_coherent_atom_size`. + /// + /// # Panics + /// + /// This function panics if block is currently mapped. + /// + /// # Safety + /// + /// `block` must have been allocated from specified `device`. + #[inline(always)] + pub unsafe fn map( + &self, + device: &impl MemoryDevice<M>, + offset: u64, + size: usize, + ) -> Result<NonNull<u8>, MapError> { + let size_u64 = u64::try_from(size).expect("`size` doesn't fit device address space"); + let size = align_up(size_u64, self.atom_mask) + .expect("aligned `size` doesn't fit device address space"); + + let aligned_offset = align_down(offset, self.atom_mask); + + assert!(offset < self.size, "`offset` is out of memory block bounds"); + assert!( + size_u64 <= self.size - offset, + "`offset + size` is out of memory block bounds" + ); + + let ptr = match self.flavor { + MemoryBlockFlavor::Dedicated => { + let offset_align_shift = offset - aligned_offset; + let offset_align_shift = isize::try_from(offset_align_shift) + .expect("`non_coherent_atom_size` is too large"); + + if !self.acquire_mapping() { + return Err(MapError::AlreadyMapped); + } + let aligned_size = offset + size - aligned_offset; + let result = + device.map_memory(&self.memory, self.offset + aligned_offset, aligned_size); + + match result { + Ok(ptr) => ptr.as_ptr().offset(offset_align_shift), + Err(err) => { + self.mapping_failed(); + return Err(err.into()); + } + } + } + MemoryBlockFlavor::Linear { ptr: Some(ptr), .. } + | MemoryBlockFlavor::Buddy { ptr: Some(ptr), .. } => { + if !self.acquire_mapping() { + return Err(MapError::AlreadyMapped); + } + + let offset_isize = isize::try_from(offset) + .expect("Buddy and linear block should fit host address space"); + ptr.as_ptr().offset(offset_isize) + } + _ => return Err(MapError::NonHostVisible), + }; + + Ok(NonNull::new_unchecked(ptr)) + } + + /// Unmaps memory range of this block that was previously mapped with `Block::map`. + /// This block becomes unmapped. + /// + /// # Panics + /// + /// This function panics if this block is not currently mapped. + /// + /// # Safety + /// + /// `block` must have been allocated from specified `device`. + #[inline(always)] + pub unsafe fn unmap(&self, device: &impl MemoryDevice<M>) -> bool { + if !self.start_unmapping() { + return false; + } + match self.flavor { + MemoryBlockFlavor::Dedicated => { + device.unmap_memory(&self.memory); + } + MemoryBlockFlavor::Linear { .. } => {} + MemoryBlockFlavor::Buddy { .. } => {} + } + self.end_unmapping(); + true + } + + /// Transiently maps block memory range and copies specified data + /// to the mapped memory range. + /// + /// # Panics + /// + /// This function panics if block is currently mapped. + /// + /// # Safety + /// + /// `block` must have been allocated from specified `device`. + /// The caller must guarantee that any previously submitted command that reads or writes to this range has completed. + #[inline(always)] + pub unsafe fn write_bytes( + &self, + device: &impl MemoryDevice<M>, + offset: u64, + data: &[u8], + ) -> Result<(), MapError> { + let size = data.len(); + let ptr = self.map(device, offset, size)?; + + copy_nonoverlapping(data.as_ptr(), ptr.as_ptr(), size); + let result = if !self.coherent() { + let aligned_offset = align_down(offset, self.atom_mask); + let size = align_up(data.len() as u64, self.atom_mask).unwrap(); + + device.flush_memory_ranges(&[MappedMemoryRange { + memory: &self.memory, + offset: aligned_offset, + size, + }]) + } else { + Ok(()) + }; + + self.unmap(device); + result.map_err(Into::into) + } + + /// Transiently maps block memory range and copies specified data + /// from the mapped memory range. + /// + /// # Panics + /// + /// This function panics if block is currently mapped. + /// + /// # Safety + /// + /// `block` must have been allocated from specified `device`. + /// The caller must guarantee that any previously submitted command that reads to this range has completed. + #[inline(always)] + pub unsafe fn read_bytes( + &self, + device: &impl MemoryDevice<M>, + offset: u64, + data: &mut [u8], + ) -> Result<(), MapError> { + #[cfg(feature = "tracing")] + { + if !self.cached() { + tracing::warn!("Reading from non-cached memory may be slow. Consider allocating HOST_CACHED memory block for host reads.") + } + } + + let size = data.len(); + let ptr = self.map(device, offset, size)?; + let result = if !self.coherent() { + let aligned_offset = align_down(offset, self.atom_mask); + let size = align_up(data.len() as u64, self.atom_mask).unwrap(); + + device.invalidate_memory_ranges(&[MappedMemoryRange { + memory: &self.memory, + offset: aligned_offset, + size, + }]) + } else { + Ok(()) + }; + if result.is_ok() { + copy_nonoverlapping(ptr.as_ptr(), data.as_mut_ptr(), size); + } + + self.unmap(device); + result.map_err(Into::into) + } + + fn acquire_mapping(&self) -> bool { + self.mapped + .compare_exchange( + MAPPING_STATE_UNMAPPED, + MAPPING_STATE_MAPPED, + Acquire, + Relaxed, + ) + .is_ok() + } + + fn mapping_failed(&self) { + self.mapped.store(MAPPING_STATE_UNMAPPED, Relaxed); + } + + fn start_unmapping(&self) -> bool { + self.mapped + .compare_exchange( + MAPPING_STATE_MAPPED, + MAPPING_STATE_UNMAPPING, + Release, + Relaxed, + ) + .is_ok() + } + + fn end_unmapping(&self) { + self.mapped.store(MAPPING_STATE_UNMAPPED, Relaxed); + } + + fn coherent(&self) -> bool { + self.props.contains(MemoryPropertyFlags::HOST_COHERENT) + } + + #[cfg(feature = "tracing")] + fn cached(&self) -> bool { + self.props.contains(MemoryPropertyFlags::HOST_CACHED) + } +} diff --git a/third_party/rust/gpu-alloc/src/buddy.rs b/third_party/rust/gpu-alloc/src/buddy.rs new file mode 100644 index 0000000000..5662109e01 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/buddy.rs @@ -0,0 +1,467 @@ +use { + crate::{ + align_up, error::AllocationError, heap::Heap, slab::Slab, unreachable_unchecked, + MemoryBounds, + }, + alloc::vec::Vec, + core::{convert::TryFrom as _, mem::replace, ptr::NonNull}, + gpu_alloc_types::{AllocationFlags, DeviceMapError, MemoryDevice, MemoryPropertyFlags}, +}; + +#[derive(Debug)] +pub(crate) struct BuddyBlock<M> { + pub memory: M, + pub ptr: Option<NonNull<u8>>, + pub size: u64, + pub chunk: usize, + pub offset: u64, + pub index: usize, +} + +unsafe impl<M> Sync for BuddyBlock<M> where M: Sync {} +unsafe impl<M> Send for BuddyBlock<M> where M: Send {} + +#[derive(Clone, Copy, Debug)] +enum PairState { + Exhausted, + Ready { + ready: Side, + next: usize, + prev: usize, + }, +} + +impl PairState { + unsafe fn replace_next(&mut self, value: usize) -> usize { + match self { + PairState::Exhausted => unreachable_unchecked(), + PairState::Ready { next, .. } => replace(next, value), + } + } + + unsafe fn replace_prev(&mut self, value: usize) -> usize { + match self { + PairState::Exhausted => unreachable_unchecked(), + PairState::Ready { prev, .. } => replace(prev, value), + } + } +} + +#[derive(Clone, Copy, Debug)] +enum Side { + Left, + Right, +} +use Side::*; + +#[derive(Debug)] +struct PairEntry { + state: PairState, + chunk: usize, + offset: u64, + parent: Option<usize>, +} + +struct SizeBlockEntry { + chunk: usize, + offset: u64, + index: usize, +} + +#[derive(Debug)] +struct Size { + next_ready: usize, + pairs: Slab<PairEntry>, +} +#[derive(Debug)] +enum Release { + None, + Parent(usize), + Chunk(usize), +} + +impl Size { + fn new() -> Self { + Size { + pairs: Slab::new(), + next_ready: 0, + } + } + + unsafe fn add_pair_and_acquire_left( + &mut self, + chunk: usize, + offset: u64, + parent: Option<usize>, + ) -> SizeBlockEntry { + if self.next_ready < self.pairs.len() { + unreachable_unchecked() + } + + let index = self.pairs.insert(PairEntry { + state: PairState::Exhausted, + chunk, + offset, + parent, + }); + + let entry = self.pairs.get_unchecked_mut(index); + entry.state = PairState::Ready { + next: index, + prev: index, + ready: Right, // Left is allocated. + }; + self.next_ready = index; + + SizeBlockEntry { + chunk, + offset, + index: index << 1, + } + } + + fn acquire(&mut self, size: u64) -> Option<SizeBlockEntry> { + if self.next_ready >= self.pairs.len() { + return None; + } + + let next_ready = self.next_ready; + + let entry = unsafe { self.pairs.get_unchecked_mut(next_ready) }; + let chunk = entry.chunk; + let offset = entry.offset; + + let bit = match entry.state { + PairState::Exhausted => unsafe { unreachable_unchecked() }, + PairState::Ready { ready, next, prev } => { + entry.state = PairState::Exhausted; + + if prev == self.next_ready { + debug_assert_eq!(next, self.next_ready); + self.next_ready = self.pairs.len(); + } else { + let prev_entry = unsafe { self.pairs.get_unchecked_mut(prev) }; + let prev_next = unsafe { prev_entry.state.replace_next(next) }; + debug_assert_eq!(prev_next, self.next_ready); + + let next_entry = unsafe { self.pairs.get_unchecked_mut(next) }; + let next_prev = unsafe { next_entry.state.replace_prev(prev) }; + debug_assert_eq!(next_prev, self.next_ready); + + self.next_ready = next; + } + + match ready { + Left => 0, + Right => 1, + } + } + }; + + Some(SizeBlockEntry { + chunk, + offset: offset + bit as u64 * size, + index: (next_ready << 1) | bit as usize, + }) + } + + fn release(&mut self, index: usize) -> Release { + let side = match index & 1 { + 0 => Side::Left, + 1 => Side::Right, + _ => unsafe { unreachable_unchecked() }, + }; + let index = index >> 1; + + let len = self.pairs.len(); + + let entry = self.pairs.get_mut(index); + + let chunk = entry.chunk; + let offset = entry.offset; + let parent = entry.parent; + + match (entry.state, side) { + (PairState::Exhausted, side) => { + if self.next_ready == len { + entry.state = PairState::Ready { + ready: side, + next: index, + prev: index, + }; + self.next_ready = index; + } else { + debug_assert!(self.next_ready < len); + + let next = self.next_ready; + let next_entry = unsafe { self.pairs.get_unchecked_mut(next) }; + let prev = unsafe { next_entry.state.replace_prev(index) }; + + let prev_entry = unsafe { self.pairs.get_unchecked_mut(prev) }; + let prev_next = unsafe { prev_entry.state.replace_next(index) }; + debug_assert_eq!(prev_next, next); + + let entry = unsafe { self.pairs.get_unchecked_mut(index) }; + entry.state = PairState::Ready { + ready: side, + next, + prev, + }; + } + Release::None + } + (PairState::Ready { ready: Left, .. }, Left) + | (PairState::Ready { ready: Right, .. }, Right) => { + panic!("Attempt to dealloate already free block") + } + + ( + PairState::Ready { + ready: Left, + next, + prev, + }, + Side::Right, + ) + | ( + PairState::Ready { + ready: Right, + next, + prev, + }, + Side::Left, + ) => { + entry.state = PairState::Exhausted; + + if prev == index { + debug_assert_eq!(next, index); + self.next_ready = self.pairs.len(); + } else { + let prev_entry = unsafe { self.pairs.get_unchecked_mut(prev) }; + let prev_next = unsafe { prev_entry.state.replace_next(next) }; + debug_assert_eq!(prev_next, index); + + let next_entry = unsafe { self.pairs.get_unchecked_mut(next) }; + let next_prev = unsafe { next_entry.state.replace_prev(prev) }; + debug_assert_eq!(next_prev, index); + + self.next_ready = next; + } + + match parent { + Some(parent) => Release::Parent(parent), + None => { + debug_assert_eq!(offset, 0); + Release::Chunk(chunk) + } + } + } + } + } +} + +#[derive(Debug)] +struct Chunk<M> { + memory: M, + ptr: Option<NonNull<u8>>, + size: u64, +} + +#[derive(Debug)] +pub(crate) struct BuddyAllocator<M> { + minimal_size: u64, + chunks: Slab<Chunk<M>>, + sizes: Vec<Size>, + memory_type: u32, + props: MemoryPropertyFlags, + atom_mask: u64, +} + +unsafe impl<M> Sync for BuddyAllocator<M> where M: Sync {} +unsafe impl<M> Send for BuddyAllocator<M> where M: Send {} + +impl<M> BuddyAllocator<M> +where + M: MemoryBounds + 'static, +{ + pub fn new( + minimal_size: u64, + initial_dedicated_size: u64, + memory_type: u32, + props: MemoryPropertyFlags, + atom_mask: u64, + ) -> Self { + assert!( + minimal_size.is_power_of_two(), + "Minimal allocation size of buddy allocator must be power of two" + ); + assert!( + initial_dedicated_size.is_power_of_two(), + "Dedicated allocation size of buddy allocator must be power of two" + ); + + let initial_sizes = (initial_dedicated_size + .trailing_zeros() + .saturating_sub(minimal_size.trailing_zeros())) as usize; + + BuddyAllocator { + minimal_size, + chunks: Slab::new(), + sizes: (0..initial_sizes).map(|_| Size::new()).collect(), + memory_type, + props, + atom_mask: atom_mask | (minimal_size - 1), + } + } + + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn alloc( + &mut self, + device: &impl MemoryDevice<M>, + size: u64, + align_mask: u64, + flags: AllocationFlags, + heap: &mut Heap, + allocations_remains: &mut u32, + ) -> Result<BuddyBlock<M>, AllocationError> + where + M: Clone, + { + let align_mask = align_mask | self.atom_mask; + + let size = align_up(size, align_mask) + .and_then(|size| size.checked_next_power_of_two()) + .ok_or(AllocationError::OutOfDeviceMemory)?; + + let size_index = size.trailing_zeros() - self.minimal_size.trailing_zeros(); + let size_index = + usize::try_from(size_index).map_err(|_| AllocationError::OutOfDeviceMemory)?; + + while self.sizes.len() <= size_index { + self.sizes.push(Size::new()); + } + + let host_visible = self.host_visible(); + + let mut candidate_size_index = size_index; + + let (mut entry, entry_size_index) = loop { + let sizes_len = self.sizes.len(); + + let candidate_size_entry = &mut self.sizes[candidate_size_index]; + let candidate_size = self.minimal_size << candidate_size_index; + + if let Some(entry) = candidate_size_entry.acquire(candidate_size) { + break (entry, candidate_size_index); + } + + if sizes_len == candidate_size_index + 1 { + // That's size of device allocation. + if *allocations_remains == 0 { + return Err(AllocationError::TooManyObjects); + } + + let chunk_size = self.minimal_size << (candidate_size_index + 1); + let memory = device.allocate_memory(chunk_size, self.memory_type, flags)?; + *allocations_remains -= 1; + heap.alloc(chunk_size); + + let ptr = if host_visible { + match device.map_memory(&memory, 0, chunk_size) { + Ok(ptr) => Some(ptr), + Err(DeviceMapError::OutOfDeviceMemory) => { + return Err(AllocationError::OutOfDeviceMemory) + } + Err(DeviceMapError::MapFailed) | Err(DeviceMapError::OutOfHostMemory) => { + return Err(AllocationError::OutOfHostMemory) + } + } + } else { + None + }; + + let chunk = self.chunks.insert(Chunk { + memory, + ptr, + size: chunk_size, + }); + + let entry = candidate_size_entry.add_pair_and_acquire_left(chunk, 0, None); + + break (entry, candidate_size_index); + } + + candidate_size_index += 1; + }; + + for size_index in (size_index..entry_size_index).rev() { + let size_entry = &mut self.sizes[size_index]; + entry = + size_entry.add_pair_and_acquire_left(entry.chunk, entry.offset, Some(entry.index)); + } + + let chunk_entry = self.chunks.get_unchecked(entry.chunk); + + debug_assert!( + entry + .offset + .checked_add(size) + .map_or(false, |end| end <= chunk_entry.size), + "Offset + size is not in chunk bounds" + ); + + Ok(BuddyBlock { + memory: chunk_entry.memory.clone(), + ptr: chunk_entry + .ptr + .map(|ptr| NonNull::new_unchecked(ptr.as_ptr().add(entry.offset as usize))), + offset: entry.offset, + size, + chunk: entry.chunk, + index: entry.index, + }) + } + + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn dealloc( + &mut self, + device: &impl MemoryDevice<M>, + block: BuddyBlock<M>, + heap: &mut Heap, + allocations_remains: &mut u32, + ) { + debug_assert!(block.size.is_power_of_two()); + + let size_index = + (block.size.trailing_zeros() - self.minimal_size.trailing_zeros()) as usize; + + let mut release_index = block.index; + let mut release_size_index = size_index; + + loop { + match self.sizes[release_size_index].release(release_index) { + Release::Parent(parent) => { + release_size_index += 1; + release_index = parent; + } + Release::Chunk(chunk) => { + debug_assert_eq!(chunk, block.chunk); + let chunk = self.chunks.remove(chunk); + debug_assert_eq!(chunk.size, self.minimal_size << (release_size_index + 1)); + + drop(block); + device.deallocate_memory(chunk.memory); + *allocations_remains += 1; + heap.dealloc(chunk.size); + + return; + } + Release::None => return, + } + } + } + + fn host_visible(&self) -> bool { + self.props.contains(MemoryPropertyFlags::HOST_VISIBLE) + } +} diff --git a/third_party/rust/gpu-alloc/src/config.rs b/third_party/rust/gpu-alloc/src/config.rs new file mode 100644 index 0000000000..84442a9fef --- /dev/null +++ b/third_party/rust/gpu-alloc/src/config.rs @@ -0,0 +1,72 @@ +/// Configuration for [`GpuAllocator`] +/// +/// [`GpuAllocator`]: type.GpuAllocator +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Config { + /// Size in bytes of request that will be served by dedicated memory object. + /// This value should be large enough to not exhaust memory object limit + /// and not use slow memory object allocation when it is not necessary. + pub dedicated_treshold: u64, + + /// Size in bytes of request that will be served by dedicated memory object if preferred. + /// This value should be large enough to not exhaust memory object limit + /// and not use slow memory object allocation when it is not necessary. + /// + /// This won't make much sense if this value is larger than `dedicated_treshold`. + pub preferred_dedicated_treshold: u64, + + /// Size in bytes of transient memory request that will be served by dedicated memory object. + /// This value should be large enough to not exhaust memory object limit + /// and not use slow memory object allocation when it is not necessary. + /// + /// This won't make much sense if this value is lesser than `dedicated_treshold`. + pub transient_dedicated_treshold: u64, + + /// Size in bytes for chunks for linear allocator. + pub linear_chunk: u64, + + /// Minimal size for buddy allocator. + pub minimal_buddy_size: u64, + + /// Initial memory object size for buddy allocator. + /// If less than `minimal_buddy_size` then `minimal_buddy_size` is used instead. + pub initial_buddy_dedicated_size: u64, +} + +impl Config { + /// Returns default configuration. + /// + /// This is not `Default` implementation to discourage usage outside of + /// prototyping. + /// + /// Proper configuration should depend on hardware and intended usage.\ + /// But those values can be used as starting point.\ + /// Note that they can simply not work for some platforms with lesser + /// memory capacity than today's "modern" GPU (year 2020). + pub fn i_am_prototyping() -> Self { + // Assume that today's modern GPU is made of 1024 potatoes. + let potato = Config::i_am_potato(); + + Config { + dedicated_treshold: potato.dedicated_treshold * 1024, + preferred_dedicated_treshold: potato.preferred_dedicated_treshold * 1024, + transient_dedicated_treshold: potato.transient_dedicated_treshold * 1024, + linear_chunk: potato.linear_chunk * 1024, + minimal_buddy_size: potato.minimal_buddy_size * 1024, + initial_buddy_dedicated_size: potato.initial_buddy_dedicated_size * 1024, + } + } + + /// Returns default configuration for average sized potato. + pub fn i_am_potato() -> Self { + Config { + dedicated_treshold: 32 * 1024, + preferred_dedicated_treshold: 1024, + transient_dedicated_treshold: 128 * 1024, + linear_chunk: 128 * 1024, + minimal_buddy_size: 1, + initial_buddy_dedicated_size: 8 * 1024, + } + } +} diff --git a/third_party/rust/gpu-alloc/src/error.rs b/third_party/rust/gpu-alloc/src/error.rs new file mode 100644 index 0000000000..2a516dfa59 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/error.rs @@ -0,0 +1,116 @@ +use { + core::fmt::{self, Display}, + gpu_alloc_types::{DeviceMapError, OutOfMemory}, +}; + +/// Enumeration of possible errors that may occur during memory allocation. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum AllocationError { + /// Backend reported that device memory has been exhausted.\ + /// Deallocating device memory from the same heap may increase chance + /// that another allocation would succeed. + OutOfDeviceMemory, + + /// Backend reported that host memory has been exhausted.\ + /// Deallocating host memory may increase chance that another allocation would succeed. + OutOfHostMemory, + + /// Allocation request cannot be fullfilled as no available memory types allowed + /// by `Request.memory_types` mask is compatible with `request.usage`. + NoCompatibleMemoryTypes, + + /// Reached limit on allocated memory objects count.\ + /// Deallocating device memory may increase chance that another allocation would succeed. + /// Especially dedicated memory blocks. + /// + /// If this error is returned when memory heaps are far from exhausted + /// `Config` should be tweaked to allocate larger memory objects. + TooManyObjects, +} + +impl From<OutOfMemory> for AllocationError { + fn from(err: OutOfMemory) -> Self { + match err { + OutOfMemory::OutOfDeviceMemory => AllocationError::OutOfDeviceMemory, + OutOfMemory::OutOfHostMemory => AllocationError::OutOfHostMemory, + } + } +} + +impl Display for AllocationError { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + AllocationError::OutOfDeviceMemory => fmt.write_str("Device memory exhausted"), + AllocationError::OutOfHostMemory => fmt.write_str("Host memory exhausted"), + AllocationError::NoCompatibleMemoryTypes => fmt.write_str( + "No compatible memory types from requested types support requested usage", + ), + AllocationError::TooManyObjects => { + fmt.write_str("Reached limit on allocated memory objects count") + } + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for AllocationError {} + +/// Enumeration of possible errors that may occur during memory mapping. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum MapError { + /// Backend reported that device memory has been exhausted.\ + /// Deallocating device memory from the same heap may increase chance + /// that another mapping would succeed. + OutOfDeviceMemory, + + /// Backend reported that host memory has been exhausted.\ + /// Deallocating host memory may increase chance that another mapping would succeed. + OutOfHostMemory, + + /// Attempt to map memory block with non-host-visible memory type.\ + /// Ensure to include `UsageFlags::HOST_ACCESS` into allocation request + /// when memory mapping is intended. + NonHostVisible, + + /// Map failed for implementation specific reason.\ + /// For Vulkan backend this includes failed attempt + /// to allocate large enough virtual address space. + MapFailed, + + /// Mapping failed due to block being already mapped. + AlreadyMapped, +} + +impl From<DeviceMapError> for MapError { + fn from(err: DeviceMapError) -> Self { + match err { + DeviceMapError::OutOfDeviceMemory => MapError::OutOfDeviceMemory, + DeviceMapError::OutOfHostMemory => MapError::OutOfHostMemory, + DeviceMapError::MapFailed => MapError::MapFailed, + } + } +} + +impl From<OutOfMemory> for MapError { + fn from(err: OutOfMemory) -> Self { + match err { + OutOfMemory::OutOfDeviceMemory => MapError::OutOfDeviceMemory, + OutOfMemory::OutOfHostMemory => MapError::OutOfHostMemory, + } + } +} + +impl Display for MapError { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MapError::OutOfDeviceMemory => fmt.write_str("Device memory exhausted"), + MapError::OutOfHostMemory => fmt.write_str("Host memory exhausted"), + MapError::MapFailed => fmt.write_str("Failed to map memory object"), + MapError::NonHostVisible => fmt.write_str("Impossible to map non-host-visible memory"), + MapError::AlreadyMapped => fmt.write_str("Block is already mapped"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for MapError {} diff --git a/third_party/rust/gpu-alloc/src/heap.rs b/third_party/rust/gpu-alloc/src/heap.rs new file mode 100644 index 0000000000..16e14c8ea2 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/heap.rs @@ -0,0 +1,36 @@ +#[derive(Debug)] +pub(crate) struct Heap { + size: u64, + used: u64, + allocated: u128, + deallocated: u128, +} + +impl Heap { + pub(crate) fn new(size: u64) -> Self { + Heap { + size, + used: 0, + allocated: 0, + deallocated: 0, + } + } + + pub(crate) fn size(&mut self) -> u64 { + self.size + } + + pub(crate) fn budget(&mut self) -> u64 { + self.size - self.used + } + + pub(crate) fn alloc(&mut self, size: u64) { + self.used += size; + self.allocated += u128::from(size); + } + + pub(crate) fn dealloc(&mut self, size: u64) { + self.used -= size; + self.deallocated += u128::from(size); + } +} diff --git a/third_party/rust/gpu-alloc/src/lib.rs b/third_party/rust/gpu-alloc/src/lib.rs new file mode 100644 index 0000000000..1b5c09adbd --- /dev/null +++ b/third_party/rust/gpu-alloc/src/lib.rs @@ -0,0 +1,92 @@ +//! +//! Implementation agnostic memory allocator for Vulkan like APIs. +//! +//! This crate is intended to be used as part of safe API implementations.\ +//! Use with caution. There are unsafe functions all over the place. +//! +//! # Usage +//! +//! Start with fetching `DeviceProperties` from `gfx-alloc-<backend>` crate for the backend of choice.\ +//! Then create `GpuAllocator` instance and use it for all device memory allocations.\ +//! `GpuAllocator` will take care for all necessary bookkeeping like memory object count limit, +//! heap budget and memory mapping. +//! +//! ### Backends implementations +//! +//! Backend supporting crates should not depend on this crate.\ +//! Instead they should depend on `gpu-alloc-types` which is much more stable, +//! allowing to upgrade `gpu-alloc` version without `gfx-alloc-<backend>` upgrade. +//! + +#![cfg_attr(not(feature = "std"), no_std)] + +extern crate alloc; + +mod allocator; +mod block; +mod buddy; +mod config; +mod error; +mod heap; +mod linear; +mod slab; +mod usage; + +pub use { + self::{allocator::*, block::MemoryBlock, config::*, error::*, usage::*}, + gpu_alloc_types::*, +}; + +/// Memory request for allocator. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct Request { + /// Minimal size of memory block required. + /// Returned block may have larger size, + /// use `MemoryBlock::size` to learn actual size of returned block. + pub size: u64, + + /// Minimal alignment mask required. + /// Returnd block may have larger alignment, + /// use `MemoryBlock::align` to learn actual alignment of returned block. + pub align_mask: u64, + + /// Intended memory usage. + /// Returned block may support additional usages, + /// use `MemoryBlock::props` to learn memory properties of returned block. + pub usage: UsageFlags, + + /// Bitset for memory types. + /// Returned block will be from memory type corresponding to one of set bits, + /// use `MemoryBlock::memory_type` to learn memory type index of returned block. + pub memory_types: u32, +} + +/// Aligns `value` up to `align_maks` +/// Returns smallest integer not lesser than `value` aligned by `align_mask`. +/// Returns `None` on overflow. +pub(crate) fn align_up(value: u64, align_mask: u64) -> Option<u64> { + Some(value.checked_add(align_mask)? & !align_mask) +} + +/// Align `value` down to `align_maks` +/// Returns largest integer not bigger than `value` aligned by `align_mask`. +pub(crate) fn align_down(value: u64, align_mask: u64) -> u64 { + value & !align_mask +} + +#[cfg(debug_assertions)] +#[allow(unused_unsafe)] +unsafe fn unreachable_unchecked() -> ! { + unreachable!() +} + +#[cfg(not(debug_assertions))] +unsafe fn unreachable_unchecked() -> ! { + core::hint::unreachable_unchecked() +} + +// #[cfg(feature = "tracing")] +use core::fmt::Debug as MemoryBounds; + +// #[cfg(not(feature = "tracing"))] +// use core::any::Any as MemoryBounds; diff --git a/third_party/rust/gpu-alloc/src/linear.rs b/third_party/rust/gpu-alloc/src/linear.rs new file mode 100644 index 0000000000..08b1c1ccbb --- /dev/null +++ b/third_party/rust/gpu-alloc/src/linear.rs @@ -0,0 +1,307 @@ +use { + crate::{align_down, align_up, error::AllocationError, heap::Heap, MemoryBounds}, + alloc::collections::VecDeque, + core::{convert::TryFrom as _, ptr::NonNull}, + gpu_alloc_types::{AllocationFlags, DeviceMapError, MemoryDevice, MemoryPropertyFlags}, +}; + +#[derive(Debug)] +pub(crate) struct LinearBlock<M> { + pub memory: M, + pub ptr: Option<NonNull<u8>>, + pub offset: u64, + pub size: u64, + pub chunk: u64, +} + +unsafe impl<M> Sync for LinearBlock<M> where M: Sync {} +unsafe impl<M> Send for LinearBlock<M> where M: Send {} + +#[derive(Debug)] +struct Chunk<M> { + memory: M, + offset: u64, + allocated: u64, + ptr: Option<NonNull<u8>>, +} + +impl<M> Chunk<M> { + fn exhaust(self) -> ExhaustedChunk<M> { + debug_assert_ne!(self.allocated, 0, "Unused chunk cannot be exhaused"); + + ExhaustedChunk { + memory: self.memory, + allocated: self.allocated, + } + } +} + +#[derive(Debug)] +struct ExhaustedChunk<M> { + memory: M, + allocated: u64, +} + +#[derive(Debug)] +pub(crate) struct LinearAllocator<M> { + ready: Option<Chunk<M>>, + exhausted: VecDeque<Option<ExhaustedChunk<M>>>, + offset: u64, + chunk_size: u64, + memory_type: u32, + props: MemoryPropertyFlags, + atom_mask: u64, +} + +unsafe impl<M> Sync for LinearAllocator<M> where M: Sync {} +unsafe impl<M> Send for LinearAllocator<M> where M: Send {} + +impl<M> LinearAllocator<M> +where + M: MemoryBounds + 'static, +{ + pub fn new( + chunk_size: u64, + memory_type: u32, + props: MemoryPropertyFlags, + atom_mask: u64, + ) -> Self { + debug_assert_eq!(align_down(chunk_size, atom_mask), chunk_size); + + LinearAllocator { + ready: None, + exhausted: VecDeque::new(), + offset: 0, + chunk_size: min(chunk_size, isize::max_value()), + memory_type, + props, + atom_mask, + } + } + + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn alloc( + &mut self, + device: &impl MemoryDevice<M>, + size: u64, + align_mask: u64, + flags: AllocationFlags, + heap: &mut Heap, + allocations_remains: &mut u32, + ) -> Result<LinearBlock<M>, AllocationError> + where + M: Clone, + { + debug_assert!( + self.chunk_size >= size, + "GpuAllocator must not request allocations equal or greater to chunks size" + ); + + let size = align_up(size, self.atom_mask) + .expect("Any value not greater than aligned chunk size must fit for alignment"); + + let align_mask = align_mask | self.atom_mask; + let host_visible = self.host_visible(); + + match &mut self.ready { + Some(ready) if fits(self.chunk_size, ready.offset, size, align_mask) => { + let chunks_offset = self.offset; + let exhausted = self.exhausted.len() as u64; + Ok(Self::alloc_from_chunk( + ready, + self.chunk_size, + chunks_offset, + exhausted, + size, + align_mask, + )) + } + + ready => { + self.exhausted + .extend(ready.take().map(Chunk::exhaust).map(Some)); + + if *allocations_remains == 0 { + return Err(AllocationError::TooManyObjects); + } + + let memory = device.allocate_memory(self.chunk_size, self.memory_type, flags)?; + + *allocations_remains -= 1; + heap.alloc(self.chunk_size); + + let ptr = if host_visible { + match device.map_memory(&memory, 0, self.chunk_size) { + Ok(ptr) => Some(ptr), + Err(DeviceMapError::MapFailed) => { + #[cfg(feature = "tracing")] + tracing::error!( + "Failed to map host-visible memory in linear allocator" + ); + device.deallocate_memory(memory); + *allocations_remains += 1; + heap.dealloc(self.chunk_size); + + return Err(AllocationError::OutOfHostMemory); + } + Err(DeviceMapError::OutOfDeviceMemory) => { + return Err(AllocationError::OutOfDeviceMemory); + } + Err(DeviceMapError::OutOfHostMemory) => { + return Err(AllocationError::OutOfHostMemory); + } + } + } else { + None + }; + + let ready = ready.get_or_insert(Chunk { + ptr, + memory, + allocated: 0, + offset: 0, + }); + + let chunks_offset = self.offset; + let exhausted = self.exhausted.len() as u64; + Ok(Self::alloc_from_chunk( + ready, + self.chunk_size, + chunks_offset, + exhausted, + size, + align_mask, + )) + } + } + } + + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn dealloc( + &mut self, + device: &impl MemoryDevice<M>, + block: LinearBlock<M>, + heap: &mut Heap, + allocations_remains: &mut u32, + ) { + debug_assert_eq!(block.ptr.is_some(), self.host_visible()); + + debug_assert!(block.chunk >= self.offset, "Chunk index is less than chunk offset in this allocator. Probably incorrect allocator instance"); + let chunk_offset = block.chunk - self.offset; + match usize::try_from(chunk_offset) { + Ok(chunk_offset) => { + if chunk_offset > self.exhausted.len() { + panic!("Chunk index is out of bounds. Probably incorrect allocator instance") + } + + if chunk_offset == self.exhausted.len() { + let chunk = self.ready.as_mut().expect( + "Chunk index is out of bounds. Probably incorrect allocator instance", + ); + chunk.allocated -= 1; + if chunk.allocated == 0 { + // Reuse chunk. + chunk.offset = 0; + } + } else { + let chunk = &mut self.exhausted[chunk_offset].as_mut().expect("Chunk index points to deallocated chunk. Probably incorrect allocator instance"); + chunk.allocated -= 1; + + if chunk.allocated == 0 { + let memory = self.exhausted[chunk_offset].take().unwrap().memory; + drop(block); + device.deallocate_memory(memory); + *allocations_remains += 1; + heap.dealloc(self.chunk_size); + + if chunk_offset == 0 { + while let Some(None) = self.exhausted.get(0) { + self.exhausted.pop_front(); + self.offset += 1; + } + } + } + } + } + Err(_) => { + panic!("Chunk index does not fit `usize`. Probably incorrect allocator instance") + } + } + } + + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn cleanup(&mut self, device: &impl MemoryDevice<M>) { + if let Some(chunk) = self.ready.take() { + assert_eq!( + chunk.allocated, 0, + "All blocks must be deallocated before cleanup" + ); + device.deallocate_memory(chunk.memory); + } + + assert!( + self.exhausted.is_empty(), + "All blocks must be deallocated before cleanup" + ); + } + + fn host_visible(&self) -> bool { + self.props.contains(MemoryPropertyFlags::HOST_VISIBLE) + } + + unsafe fn alloc_from_chunk( + chunk: &mut Chunk<M>, + chunk_size: u64, + chunks_offset: u64, + exhausted: u64, + size: u64, + align_mask: u64, + ) -> LinearBlock<M> + where + M: Clone, + { + debug_assert!( + fits(chunk_size, chunk.offset, size, align_mask), + "Must be checked in caller" + ); + + let offset = + align_up(chunk.offset, align_mask).expect("Chunk must be checked to fit allocation"); + + chunk.offset = offset + size; + chunk.allocated += 1; + + debug_assert!( + offset + .checked_add(size) + .map_or(false, |end| end <= chunk_size), + "Offset + size is not in chunk bounds" + ); + LinearBlock { + memory: chunk.memory.clone(), + ptr: chunk + .ptr + .map(|ptr| NonNull::new_unchecked(ptr.as_ptr().offset(offset as isize))), + offset, + size, + chunk: chunks_offset + exhausted, + } + } +} + +fn fits(chunk_size: u64, chunk_offset: u64, size: u64, align_mask: u64) -> bool { + align_up(chunk_offset, align_mask) + .and_then(|aligned| aligned.checked_add(size)) + .map_or(false, |end| end <= chunk_size) +} + +fn min<L, R>(l: L, r: R) -> L +where + R: core::convert::TryInto<L>, + L: Ord, +{ + match r.try_into() { + Ok(r) => core::cmp::min(l, r), + Err(_) => l, + } +} diff --git a/third_party/rust/gpu-alloc/src/slab.rs b/third_party/rust/gpu-alloc/src/slab.rs new file mode 100644 index 0000000000..eff164dbb3 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/slab.rs @@ -0,0 +1,97 @@ +use {crate::unreachable_unchecked, alloc::vec::Vec, core::mem::replace}; + +#[derive(Debug)] +enum Entry<T> { + Vacant(usize), + Occupied(T), +} +#[derive(Debug)] +pub(crate) struct Slab<T> { + next_vacant: usize, + entries: Vec<Entry<T>>, +} + +impl<T> Slab<T> { + pub fn new() -> Self { + Slab { + next_vacant: !0, + entries: Vec::new(), + } + } + + /// Inserts value into this linked vec and returns index + /// at which value can be accessed in constant time. + pub fn insert(&mut self, value: T) -> usize { + if self.next_vacant >= self.entries.len() { + self.entries.push(Entry::Occupied(value)); + self.entries.len() - 1 + } else { + match *unsafe { self.entries.get_unchecked(self.next_vacant) } { + Entry::Vacant(next_vacant) => { + unsafe { + *self.entries.get_unchecked_mut(self.next_vacant) = Entry::Occupied(value); + } + replace(&mut self.next_vacant, next_vacant) + } + _ => unsafe { unreachable_unchecked() }, + } + } + } + + pub fn len(&self) -> usize { + self.entries.len() + } + + pub unsafe fn get_unchecked(&self, index: usize) -> &T { + debug_assert!(index < self.len()); + + match self.entries.get_unchecked(index) { + Entry::Occupied(value) => value, + _ => unreachable_unchecked(), + } + } + + pub unsafe fn get_unchecked_mut(&mut self, index: usize) -> &mut T { + debug_assert!(index < self.len()); + + match self.entries.get_unchecked_mut(index) { + Entry::Occupied(value) => value, + _ => unreachable_unchecked(), + } + } + + // pub fn get(&self, index: usize) -> &T { + // match self.entries.get(index) { + // Some(Entry::Occupied(value)) => value, + // _ => panic!("Invalid index"), + // } + // } + + pub fn get_mut(&mut self, index: usize) -> &mut T { + match self.entries.get_mut(index) { + Some(Entry::Occupied(value)) => value, + _ => panic!("Invalid index"), + } + } + + pub unsafe fn remove_unchecked(&mut self, index: usize) -> T { + let entry = replace( + self.entries.get_unchecked_mut(index), + Entry::Vacant(self.next_vacant), + ); + + self.next_vacant = index; + + match entry { + Entry::Occupied(value) => value, + _ => unreachable_unchecked(), + } + } + + pub fn remove(&mut self, index: usize) -> T { + match self.entries.get_mut(index) { + Some(Entry::Occupied(_)) => unsafe { self.remove_unchecked(index) }, + _ => panic!("Invalid index"), + } + } +} diff --git a/third_party/rust/gpu-alloc/src/usage.rs b/third_party/rust/gpu-alloc/src/usage.rs new file mode 100644 index 0000000000..29ed02ec25 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/usage.rs @@ -0,0 +1,153 @@ +use { + core::fmt::{self, Debug}, + gpu_alloc_types::{MemoryPropertyFlags, MemoryType}, +}; + +bitflags::bitflags! { + /// Memory usage type. + /// Bits set define intended usage for requested memory. + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + pub struct UsageFlags: u8 { + /// Hints for allocator to find memory with faster device access. + /// If no flags is specified than `FAST_DEVICE_ACCESS` is implied. + const FAST_DEVICE_ACCESS = 0x01; + + /// Memory will be accessed from host. + /// This flags guarantees that host memory operations will be available. + /// Otherwise implementation is encouraged to use non-host-accessible memory. + const HOST_ACCESS = 0x02; + + /// Hints allocator that memory will be used for data downloading. + /// Allocator will strongly prefer host-cached memory. + /// Implies `HOST_ACCESS` flag. + const DOWNLOAD = 0x04; + + /// Hints allocator that memory will be used for data uploading. + /// If `DOWNLOAD` flag is not set then allocator will assume that + /// host will access memory in write-only manner and may + /// pick not host-cached. + /// Implies `HOST_ACCESS` flag. + const UPLOAD = 0x08; + + /// Hints allocator that memory will be used for short duration + /// allowing to use faster algorithm with less memory overhead. + /// If use holds returned memory block for too long then + /// effective memory overhead increases instead. + /// Best use case is for staging buffer for single batch of operations. + const TRANSIENT = 0x10; + + /// Requests memory that can be addressed with `u64`. + /// Allows fetching device address for resources bound to that memory. + const DEVICE_ADDRESS = 0x20; + } +} + +#[derive(Clone, Copy, Debug)] +struct MemoryForOneUsage { + mask: u32, + types: [u32; 32], + types_count: u32, +} + +pub(crate) struct MemoryForUsage { + usages: [MemoryForOneUsage; 64], +} + +impl Debug for MemoryForUsage { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("MemoryForUsage") + .field("usages", &&self.usages[..]) + .finish() + } +} + +impl MemoryForUsage { + pub fn new(memory_types: &[MemoryType]) -> Self { + assert!( + memory_types.len() <= 32, + "Only up to 32 memory types supported" + ); + + let mut mfu = MemoryForUsage { + usages: [MemoryForOneUsage { + mask: 0, + types: [0; 32], + types_count: 0, + }; 64], + }; + + for usage in 0..64 { + mfu.usages[usage as usize] = + one_usage(UsageFlags::from_bits_truncate(usage), memory_types); + } + + mfu + } + + /// Returns mask with bits set for memory type indices that support the + /// usage. + pub fn mask(&self, usage: UsageFlags) -> u32 { + self.usages[usage.bits() as usize].mask + } + + /// Returns slice of memory type indices that support the usage. + /// Earlier memory type has priority over later. + pub fn types(&self, usage: UsageFlags) -> &[u32] { + let usage = &self.usages[usage.bits() as usize]; + &usage.types[..usage.types_count as usize] + } +} + +fn one_usage(usage: UsageFlags, memory_types: &[MemoryType]) -> MemoryForOneUsage { + let mut types = [0; 32]; + let mut types_count = 0; + + for (index, mt) in memory_types.iter().enumerate() { + if compatible(usage, mt.props) { + types[types_count as usize] = index as u32; + types_count += 1; + } + } + + types[..types_count as usize] + .sort_unstable_by_key(|&index| priority(usage, memory_types[index as usize].props)); + + let mask = types[..types_count as usize] + .iter() + .fold(0u32, |mask, index| mask | 1u32 << index); + + MemoryForOneUsage { + types, + mask, + types_count, + } +} + +fn compatible(usage: UsageFlags, flags: MemoryPropertyFlags) -> bool { + type Flags = MemoryPropertyFlags; + if flags.contains(Flags::LAZILY_ALLOCATED) || flags.contains(Flags::PROTECTED) { + false + } else if usage.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD) + { + flags.contains(Flags::HOST_VISIBLE) + } else { + true + } +} + +fn priority(usage: UsageFlags, flags: MemoryPropertyFlags) -> u32 { + type Flags = MemoryPropertyFlags; + + let device_local: bool = flags.contains(Flags::DEVICE_LOCAL) + ^ (usage.is_empty() || usage.contains(UsageFlags::FAST_DEVICE_ACCESS)); + + let host_visible: bool = flags.contains(Flags::HOST_VISIBLE) + && !usage.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD); + + let cached: bool = flags.contains(Flags::HOST_CACHED) ^ usage.contains(UsageFlags::DOWNLOAD); + + let coherent: bool = flags.contains(Flags::HOST_COHERENT) + ^ (usage.intersects(UsageFlags::UPLOAD | UsageFlags::DOWNLOAD)); + + device_local as u32 * 8 + host_visible as u32 * 4 + cached as u32 * 2 + coherent as u32 +} |