diff options
Diffstat (limited to 'third_party/rust/gpu-alloc')
-rw-r--r-- | third_party/rust/gpu-alloc/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/Cargo.toml | 61 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/README.md | 51 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/allocator.rs | 611 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/block.rs | 327 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/buddy.rs | 460 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/config.rs | 77 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/error.rs | 116 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/freelist.rs | 528 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/heap.rs | 32 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/lib.rs | 124 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/slab.rs | 97 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/usage.rs | 176 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/util.rs | 44 |
14 files changed, 2705 insertions, 0 deletions
diff --git a/third_party/rust/gpu-alloc/.cargo-checksum.json b/third_party/rust/gpu-alloc/.cargo-checksum.json new file mode 100644 index 0000000000..51a8d5b201 --- /dev/null +++ b/third_party/rust/gpu-alloc/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"3647062a9589eedcc361f51f1e1f0852ae6be3c028d0dc8f8d3c0c944bbca61e","README.md":"5a96b135d18e172f090bd6147830ab3a1b5cefac5f02be28f06cf88eea61b64f","src/allocator.rs":"28f20bbfc3866b4eb94a025027925268178921393f5ab8eb5febad1acf94dce8","src/block.rs":"3553343eb171e7ef8b771a1582489cbbbe22b3a1aca4d54b9ff0174fd30bf0da","src/buddy.rs":"253df5f689b643cf97cfd27be6512ea84b84d3c2097a35f3f2d73f537ab353b3","src/config.rs":"1851264db7576f92f3b455e4667339e1c48b7f1b88f8fae7be95c6a9badde455","src/error.rs":"50e30bccc4ba3b23d99298a65155eec45f7d91b66773e828460907ebdcb8ee41","src/freelist.rs":"92c9241d899f3e92a70b0eb5af0b557a3a76aa14c4a6d23876c3ed7857b5f15b","src/heap.rs":"347c25a8560d39d1abb807fb5c34a6a51d4786c0be24a92293f9c8217bace340","src/lib.rs":"07077fb465fb471b39db99bb0311082133d1c2044c2e192d20f7bbb9e743dd0b","src/slab.rs":"6a2b818087850bd5615b085d912490dbd46bbca0df28daa86932505b3983c64a","src/usage.rs":"99009f2ff532904de3ef66520336cd25bd1b795afcb0158385e78a5908f8308f","src/util.rs":"ce3fd7a278eb4d4bd030d4db5495313f56dc91b0765c394f88d126f11c2b4e75"},"package":"fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171"}
\ No newline at end of file diff --git a/third_party/rust/gpu-alloc/Cargo.toml b/third_party/rust/gpu-alloc/Cargo.toml new file mode 100644 index 0000000000..6021c6cbf2 --- /dev/null +++ b/third_party/rust/gpu-alloc/Cargo.toml @@ -0,0 +1,61 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "gpu-alloc" +version = "0.6.0" +authors = ["Zakarum <zakarumych@ya.ru>"] +description = "Implementation agnostic memory allocator for Vulkan like APIs" +homepage = "https://github.com/zakarumych/gpu-alloc" +documentation = "https://docs.rs/gpu-alloc-types" +readme = "README.md" +keywords = [ + "gpu", + "vulkan", + "allocation", + "no-std", +] +categories = [ + "graphics", + "memory-management", + "no-std", + "game-development", +] +license = "MIT OR Apache-2.0" +repository = "https://github.com/zakarumych/gpu-alloc" + +[dependencies.bitflags] +version = "2.0" +default-features = false + +[dependencies.gpu-alloc-types] +version = "=0.3.0" + +[dependencies.serde] +version = "1.0" +features = ["derive"] +optional = true +default-features = false + +[dependencies.tracing] +version = "0.1.27" +features = ["attributes"] +optional = true +default-features = false + +[features] +default = ["std"] +serde = [ + "dep:serde", + "bitflags/serde", +] +std = [] diff --git a/third_party/rust/gpu-alloc/README.md b/third_party/rust/gpu-alloc/README.md new file mode 100644 index 0000000000..8a93d685c1 --- /dev/null +++ b/third_party/rust/gpu-alloc/README.md @@ -0,0 +1,51 @@ +# gpu-alloc
+
+[![crates](https://img.shields.io/crates/v/gpu-alloc.svg?style=for-the-badge&label=gpu-alloc)](https://crates.io/crates/gpu-alloc)
+[![docs](https://img.shields.io/badge/docs.rs-gpu--alloc-66c2a5?style=for-the-badge&labelColor=555555&logoColor=white)](https://docs.rs/gpu-alloc)
+[![actions](https://img.shields.io/github/workflow/status/zakarumych/gpu-alloc/Rust/main?style=for-the-badge)](https://github.com/zakarumych/gpu-alloc/actions?query=workflow%3ARust)
+[![MIT/Apache](https://img.shields.io/badge/license-MIT%2FApache-blue.svg?style=for-the-badge)](COPYING)
+![loc](https://img.shields.io/tokei/lines/github/zakarumych/gpu-alloc?style=for-the-badge)
+
+
+Implementation agnostic memory allocator for Vulkan like APIs.
+
+This crate is intended to be used as part of safe API implementations.\
+Use with caution. There are unsafe functions all over the place.
+
+## Usage
+
+Start with fetching `DeviceProperties` from `gpu-alloc-<backend>` crate for the backend of choice.\
+Then create `GpuAllocator` instance and use it for all device memory allocations.\
+`GpuAllocator` will take care for all necessary bookkeeping like memory object count limit,
+heap budget and memory mapping.
+
+#### Backends implementations
+
+Backend supporting crates should not depend on this crate.\
+Instead they should depend on `gpu-alloc-types` which is much more stable,
+allowing to upgrade `gpu-alloc` version without `gpu-alloc-<backend>` upgrade.
+
+
+Supported Rust Versions
+
+The minimum supported version is 1.40.
+The current version is not guaranteed to build on Rust versions earlier than the minimum supported version.
+
+`gpu-alloc-erupt` crate requires version 1.48 or higher due to dependency on `erupt` crate.
+
+## License
+
+Licensed under either of
+
+* Apache License, Version 2.0, ([license/APACHE](license/APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
+* MIT license ([license/MIT](license/MIT) or http://opensource.org/licenses/MIT)
+
+at your option.
+
+## Contributions
+
+Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
+
+## Donate
+
+[![Become a patron](https://c5.patreon.com/external/logo/become_a_patron_button.png)](https://www.patreon.com/zakarum)
diff --git a/third_party/rust/gpu-alloc/src/allocator.rs b/third_party/rust/gpu-alloc/src/allocator.rs new file mode 100644 index 0000000000..3f862ce565 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/allocator.rs @@ -0,0 +1,611 @@ +use {
+ crate::{
+ align_down,
+ block::{MemoryBlock, MemoryBlockFlavor},
+ buddy::{BuddyAllocator, BuddyBlock},
+ config::Config,
+ error::AllocationError,
+ freelist::{FreeListAllocator, FreeListBlock},
+ heap::Heap,
+ usage::{MemoryForUsage, UsageFlags},
+ MemoryBounds, Request,
+ },
+ alloc::boxed::Box,
+ core::convert::TryFrom as _,
+ gpu_alloc_types::{
+ AllocationFlags, DeviceProperties, MemoryDevice, MemoryPropertyFlags, MemoryType,
+ OutOfMemory,
+ },
+};
+
+/// Memory allocator for Vulkan-like APIs.
+#[derive(Debug)]
+pub struct GpuAllocator<M> {
+ dedicated_threshold: u64,
+ preferred_dedicated_threshold: u64,
+ transient_dedicated_threshold: u64,
+ max_memory_allocation_size: u64,
+ memory_for_usage: MemoryForUsage,
+ memory_types: Box<[MemoryType]>,
+ memory_heaps: Box<[Heap]>,
+ allocations_remains: u32,
+ non_coherent_atom_mask: u64,
+ starting_free_list_chunk: u64,
+ final_free_list_chunk: u64,
+ minimal_buddy_size: u64,
+ initial_buddy_dedicated_size: u64,
+ buffer_device_address: bool,
+
+ buddy_allocators: Box<[Option<BuddyAllocator<M>>]>,
+ freelist_allocators: Box<[Option<FreeListAllocator<M>>]>,
+}
+
+/// Hints for allocator to decide on allocation strategy.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[non_exhaustive]
+pub enum Dedicated {
+ /// Allocation directly from device.\
+ /// Very slow.
+ /// Count of allocations is limited.\
+ /// Use with caution.\
+ /// Must be used if resource has to be bound to dedicated memory object.
+ Required,
+
+ /// Hint for allocator that dedicated memory object is preferred.\
+ /// Should be used if it is known that resource placed in dedicated memory object
+ /// would allow for better performance.\
+ /// Implementation is allowed to return block to shared memory object.
+ Preferred,
+}
+
+impl<M> GpuAllocator<M>
+where
+ M: MemoryBounds + 'static,
+{
+ /// Creates new instance of `GpuAllocator`.
+ /// Provided `DeviceProperties` should match properties of `MemoryDevice` that will be used
+ /// with created `GpuAllocator` instance.
+ #[cfg_attr(feature = "tracing", tracing::instrument)]
+ pub fn new(config: Config, props: DeviceProperties<'_>) -> Self {
+ assert!(
+ props.non_coherent_atom_size.is_power_of_two(),
+ "`non_coherent_atom_size` must be power of two"
+ );
+
+ assert!(
+ isize::try_from(props.non_coherent_atom_size).is_ok(),
+ "`non_coherent_atom_size` must fit host address space"
+ );
+
+ GpuAllocator {
+ dedicated_threshold: config.dedicated_threshold,
+ preferred_dedicated_threshold: config
+ .preferred_dedicated_threshold
+ .min(config.dedicated_threshold),
+
+ transient_dedicated_threshold: config
+ .transient_dedicated_threshold
+ .max(config.dedicated_threshold),
+
+ max_memory_allocation_size: props.max_memory_allocation_size,
+
+ memory_for_usage: MemoryForUsage::new(props.memory_types.as_ref()),
+
+ memory_types: props.memory_types.as_ref().iter().copied().collect(),
+ memory_heaps: props
+ .memory_heaps
+ .as_ref()
+ .iter()
+ .map(|heap| Heap::new(heap.size))
+ .collect(),
+
+ buffer_device_address: props.buffer_device_address,
+
+ allocations_remains: props.max_memory_allocation_count,
+ non_coherent_atom_mask: props.non_coherent_atom_size - 1,
+
+ starting_free_list_chunk: config.starting_free_list_chunk,
+ final_free_list_chunk: config.final_free_list_chunk,
+ minimal_buddy_size: config.minimal_buddy_size,
+ initial_buddy_dedicated_size: config.initial_buddy_dedicated_size,
+
+ buddy_allocators: props.memory_types.as_ref().iter().map(|_| None).collect(),
+ freelist_allocators: props.memory_types.as_ref().iter().map(|_| None).collect(),
+ }
+ }
+
+ /// Allocates memory block from specified `device` according to the `request`.
+ ///
+ /// # Safety
+ ///
+ /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance.
+ /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance
+ /// and memory blocks allocated from it.
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
+ pub unsafe fn alloc(
+ &mut self,
+ device: &impl MemoryDevice<M>,
+ request: Request,
+ ) -> Result<MemoryBlock<M>, AllocationError> {
+ self.alloc_internal(device, request, None)
+ }
+
+ /// Allocates memory block from specified `device` according to the `request`.
+ /// This function allows user to force specific allocation strategy.
+ /// Improper use can lead to suboptimal performance or too large overhead.
+ /// Prefer `GpuAllocator::alloc` if doubt.
+ ///
+ /// # Safety
+ ///
+ /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance.
+ /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance
+ /// and memory blocks allocated from it.
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
+ pub unsafe fn alloc_with_dedicated(
+ &mut self,
+ device: &impl MemoryDevice<M>,
+ request: Request,
+ dedicated: Dedicated,
+ ) -> Result<MemoryBlock<M>, AllocationError> {
+ self.alloc_internal(device, request, Some(dedicated))
+ }
+
+ unsafe fn alloc_internal(
+ &mut self,
+ device: &impl MemoryDevice<M>,
+ mut request: Request,
+ dedicated: Option<Dedicated>,
+ ) -> Result<MemoryBlock<M>, AllocationError> {
+ enum Strategy {
+ Buddy,
+ Dedicated,
+ FreeList,
+ }
+
+ request.usage = with_implicit_usage_flags(request.usage);
+
+ if request.usage.contains(UsageFlags::DEVICE_ADDRESS) {
+ assert!(self.buffer_device_address, "`DEVICE_ADDRESS` cannot be requested when `DeviceProperties::buffer_device_address` is false");
+ }
+
+ if request.size > self.max_memory_allocation_size {
+ return Err(AllocationError::OutOfDeviceMemory);
+ }
+
+ if let Some(Dedicated::Required) = dedicated {
+ if self.allocations_remains == 0 {
+ return Err(AllocationError::TooManyObjects);
+ }
+ }
+
+ if 0 == self.memory_for_usage.mask(request.usage) & request.memory_types {
+ #[cfg(feature = "tracing")]
+ tracing::error!(
+ "Cannot serve request {:?}, no memory among bitset `{}` support usage {:?}",
+ request,
+ request.memory_types,
+ request.usage
+ );
+
+ return Err(AllocationError::NoCompatibleMemoryTypes);
+ }
+
+ let transient = request.usage.contains(UsageFlags::TRANSIENT);
+
+ for &index in self.memory_for_usage.types(request.usage) {
+ if 0 == request.memory_types & (1 << index) {
+ // Skip memory type incompatible with the request.
+ continue;
+ }
+
+ let memory_type = &self.memory_types[index as usize];
+ let heap = memory_type.heap;
+ let heap = &mut self.memory_heaps[heap as usize];
+
+ if request.size > heap.size() {
+ // Impossible to use memory type from this heap.
+ continue;
+ }
+
+ let atom_mask = if host_visible_non_coherent(memory_type.props) {
+ self.non_coherent_atom_mask
+ } else {
+ 0
+ };
+
+ let flags = if self.buffer_device_address {
+ AllocationFlags::DEVICE_ADDRESS
+ } else {
+ AllocationFlags::empty()
+ };
+
+ let strategy = match (dedicated, transient) {
+ (Some(Dedicated::Required), _) => Strategy::Dedicated,
+ (Some(Dedicated::Preferred), _)
+ if request.size >= self.preferred_dedicated_threshold =>
+ {
+ Strategy::Dedicated
+ }
+ (_, true) => {
+ let threshold = self.transient_dedicated_threshold.min(heap.size() / 32);
+
+ if request.size < threshold {
+ Strategy::FreeList
+ } else {
+ Strategy::Dedicated
+ }
+ }
+ (_, false) => {
+ let threshold = self.dedicated_threshold.min(heap.size() / 32);
+
+ if request.size < threshold {
+ Strategy::Buddy
+ } else {
+ Strategy::Dedicated
+ }
+ }
+ };
+
+ match strategy {
+ Strategy::Dedicated => {
+ #[cfg(feature = "tracing")]
+ tracing::debug!(
+ "Allocating memory object `{}@{:?}`",
+ request.size,
+ memory_type
+ );
+
+ match device.allocate_memory(request.size, index, flags) {
+ Ok(memory) => {
+ self.allocations_remains -= 1;
+ heap.alloc(request.size);
+
+ return Ok(MemoryBlock::new(
+ index,
+ memory_type.props,
+ 0,
+ request.size,
+ atom_mask,
+ MemoryBlockFlavor::Dedicated { memory },
+ ));
+ }
+ Err(OutOfMemory::OutOfDeviceMemory) => continue,
+ Err(OutOfMemory::OutOfHostMemory) => {
+ return Err(AllocationError::OutOfHostMemory)
+ }
+ }
+ }
+ Strategy::FreeList => {
+ let allocator = match &mut self.freelist_allocators[index as usize] {
+ Some(allocator) => allocator,
+ slot => {
+ let starting_free_list_chunk = match align_down(
+ self.starting_free_list_chunk.min(heap.size() / 32),
+ atom_mask,
+ ) {
+ 0 => atom_mask,
+ other => other,
+ };
+
+ let final_free_list_chunk = match align_down(
+ self.final_free_list_chunk
+ .max(self.starting_free_list_chunk)
+ .max(self.transient_dedicated_threshold)
+ .min(heap.size() / 32),
+ atom_mask,
+ ) {
+ 0 => atom_mask,
+ other => other,
+ };
+
+ slot.get_or_insert(FreeListAllocator::new(
+ starting_free_list_chunk,
+ final_free_list_chunk,
+ index,
+ memory_type.props,
+ if host_visible_non_coherent(memory_type.props) {
+ self.non_coherent_atom_mask
+ } else {
+ 0
+ },
+ ))
+ }
+ };
+ let result = allocator.alloc(
+ device,
+ request.size,
+ request.align_mask,
+ flags,
+ heap,
+ &mut self.allocations_remains,
+ );
+
+ match result {
+ Ok(block) => {
+ return Ok(MemoryBlock::new(
+ index,
+ memory_type.props,
+ block.offset,
+ block.size,
+ atom_mask,
+ MemoryBlockFlavor::FreeList {
+ chunk: block.chunk,
+ ptr: block.ptr,
+ memory: block.memory,
+ },
+ ))
+ }
+ Err(AllocationError::OutOfDeviceMemory) => continue,
+ Err(err) => return Err(err),
+ }
+ }
+
+ Strategy::Buddy => {
+ let allocator = match &mut self.buddy_allocators[index as usize] {
+ Some(allocator) => allocator,
+ slot => {
+ let minimal_buddy_size = self
+ .minimal_buddy_size
+ .min(heap.size() / 1024)
+ .next_power_of_two();
+
+ let initial_buddy_dedicated_size = self
+ .initial_buddy_dedicated_size
+ .min(heap.size() / 32)
+ .next_power_of_two();
+
+ slot.get_or_insert(BuddyAllocator::new(
+ minimal_buddy_size,
+ initial_buddy_dedicated_size,
+ index,
+ memory_type.props,
+ if host_visible_non_coherent(memory_type.props) {
+ self.non_coherent_atom_mask
+ } else {
+ 0
+ },
+ ))
+ }
+ };
+ let result = allocator.alloc(
+ device,
+ request.size,
+ request.align_mask,
+ flags,
+ heap,
+ &mut self.allocations_remains,
+ );
+
+ match result {
+ Ok(block) => {
+ return Ok(MemoryBlock::new(
+ index,
+ memory_type.props,
+ block.offset,
+ block.size,
+ atom_mask,
+ MemoryBlockFlavor::Buddy {
+ chunk: block.chunk,
+ ptr: block.ptr,
+ index: block.index,
+ memory: block.memory,
+ },
+ ))
+ }
+ Err(AllocationError::OutOfDeviceMemory) => continue,
+ Err(err) => return Err(err),
+ }
+ }
+ }
+ }
+
+ Err(AllocationError::OutOfDeviceMemory)
+ }
+
+ /// Creates a memory block from an existing memory allocation, transferring ownership to the allocator.
+ ///
+ /// This function allows the [`GpuAllocator`] to manage memory allocated outside of the typical
+ /// [`GpuAllocator::alloc`] family of functions.
+ ///
+ /// # Usage
+ ///
+ /// If you need to import external memory, such as a Win32 `HANDLE` or a Linux `dmabuf`, import the device
+ /// memory using the graphics api and platform dependent functions. Once that is done, call this function
+ /// to make the [`GpuAllocator`] take ownership of the imported memory.
+ ///
+ /// When calling this function, you **must** ensure there are [enough remaining allocations](GpuAllocator::remaining_allocations).
+ ///
+ /// # Safety
+ ///
+ /// - The `memory` must be allocated with the same device that was provided to create this [`GpuAllocator`]
+ /// instance.
+ /// - The `memory` must be valid.
+ /// - The `props`, `offset` and `size` must match the properties, offset and size of the memory allocation.
+ /// - The memory must have been allocated with the specified `memory_type`.
+ /// - There must be enough remaining allocations.
+ /// - The memory allocation must not come from an existing memory block created by this allocator.
+ /// - The underlying memory object must be deallocated using the returned [`MemoryBlock`] with
+ /// [`GpuAllocator::dealloc`].
+ pub unsafe fn import_memory(
+ &mut self,
+ memory: M,
+ memory_type: u32,
+ props: MemoryPropertyFlags,
+ offset: u64,
+ size: u64,
+ ) -> MemoryBlock<M> {
+ // Get the heap which the imported memory is from.
+ let heap = self
+ .memory_types
+ .get(memory_type as usize)
+ .expect("Invalid memory type specified when importing memory")
+ .heap;
+ let heap = &mut self.memory_heaps[heap as usize];
+
+ #[cfg(feature = "tracing")]
+ tracing::debug!(
+ "Importing memory object {:?} `{}@{:?}`",
+ memory,
+ size,
+ memory_type
+ );
+
+ assert_ne!(
+ self.allocations_remains, 0,
+ "Out of allocations when importing a memory block. Ensure you check GpuAllocator::remaining_allocations before import."
+ );
+ self.allocations_remains -= 1;
+
+ let atom_mask = if host_visible_non_coherent(props) {
+ self.non_coherent_atom_mask
+ } else {
+ 0
+ };
+
+ heap.alloc(size);
+
+ MemoryBlock::new(
+ memory_type,
+ props,
+ offset,
+ size,
+ atom_mask,
+ MemoryBlockFlavor::Dedicated { memory },
+ )
+ }
+
+ /// Deallocates memory block previously allocated from this `GpuAllocator` instance.
+ ///
+ /// # Safety
+ ///
+ /// * Memory block must have been allocated by this `GpuAllocator` instance
+ /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance
+ /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance
+ /// and memory blocks allocated from it
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
+ pub unsafe fn dealloc(&mut self, device: &impl MemoryDevice<M>, block: MemoryBlock<M>) {
+ let memory_type = block.memory_type();
+ let offset = block.offset();
+ let size = block.size();
+ let flavor = block.deallocate();
+ match flavor {
+ MemoryBlockFlavor::Dedicated { memory } => {
+ let heap = self.memory_types[memory_type as usize].heap;
+ device.deallocate_memory(memory);
+ self.allocations_remains += 1;
+ self.memory_heaps[heap as usize].dealloc(size);
+ }
+ MemoryBlockFlavor::Buddy {
+ chunk,
+ ptr,
+ index,
+ memory,
+ } => {
+ let heap = self.memory_types[memory_type as usize].heap;
+ let heap = &mut self.memory_heaps[heap as usize];
+
+ let allocator = self.buddy_allocators[memory_type as usize]
+ .as_mut()
+ .expect("Allocator should exist");
+
+ allocator.dealloc(
+ device,
+ BuddyBlock {
+ memory,
+ ptr,
+ offset,
+ size,
+ chunk,
+ index,
+ },
+ heap,
+ &mut self.allocations_remains,
+ );
+ }
+ MemoryBlockFlavor::FreeList { chunk, ptr, memory } => {
+ let heap = self.memory_types[memory_type as usize].heap;
+ let heap = &mut self.memory_heaps[heap as usize];
+
+ let allocator = self.freelist_allocators[memory_type as usize]
+ .as_mut()
+ .expect("Allocator should exist");
+
+ allocator.dealloc(
+ device,
+ FreeListBlock {
+ memory,
+ ptr,
+ chunk,
+ offset,
+ size,
+ },
+ heap,
+ &mut self.allocations_remains,
+ );
+ }
+ }
+ }
+
+ /// Returns the maximum allocation size supported.
+ pub fn max_allocation_size(&self) -> u64 {
+ self.max_memory_allocation_size
+ }
+
+ /// Returns the number of remaining available allocations.
+ ///
+ /// This may be useful if you need know if the allocator can allocate a number of allocations ahead of
+ /// time. This function is also useful for ensuring you do not allocate too much memory outside allocator
+ /// (such as external memory).
+ pub fn remaining_allocations(&self) -> u32 {
+ self.allocations_remains
+ }
+
+ /// Sets the number of remaining available allocations.
+ ///
+ /// # Safety
+ ///
+ /// The caller is responsible for ensuring the number of remaining allocations does not exceed how many
+ /// remaining allocations there actually are on the memory device.
+ pub unsafe fn set_remaining_allocations(&mut self, remaining: u32) {
+ self.allocations_remains = remaining;
+ }
+
+ /// Deallocates leftover memory objects.
+ /// Should be used before dropping.
+ ///
+ /// # Safety
+ ///
+ /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance
+ /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance
+ /// and memory blocks allocated from it
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
+ pub unsafe fn cleanup(&mut self, device: &impl MemoryDevice<M>) {
+ for (index, allocator) in self
+ .freelist_allocators
+ .iter_mut()
+ .enumerate()
+ .filter_map(|(index, allocator)| Some((index, allocator.as_mut()?)))
+ {
+ let memory_type = &self.memory_types[index];
+ let heap = memory_type.heap;
+ let heap = &mut self.memory_heaps[heap as usize];
+
+ allocator.cleanup(device, heap, &mut self.allocations_remains);
+ }
+ }
+}
+
+fn host_visible_non_coherent(props: MemoryPropertyFlags) -> bool {
+ (props & (MemoryPropertyFlags::HOST_COHERENT | MemoryPropertyFlags::HOST_VISIBLE))
+ == MemoryPropertyFlags::HOST_VISIBLE
+}
+
+fn with_implicit_usage_flags(usage: UsageFlags) -> UsageFlags {
+ if usage.is_empty() {
+ UsageFlags::FAST_DEVICE_ACCESS
+ } else if usage.intersects(UsageFlags::DOWNLOAD | UsageFlags::UPLOAD) {
+ usage | UsageFlags::HOST_ACCESS
+ } else {
+ usage
+ }
+}
diff --git a/third_party/rust/gpu-alloc/src/block.rs b/third_party/rust/gpu-alloc/src/block.rs new file mode 100644 index 0000000000..03e31d9d52 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/block.rs @@ -0,0 +1,327 @@ +use {
+ crate::{align_down, align_up, error::MapError},
+ alloc::sync::Arc,
+ core::{
+ convert::TryFrom as _,
+ ptr::{copy_nonoverlapping, NonNull},
+ // sync::atomic::{AtomicU8, Ordering::*},
+ },
+ gpu_alloc_types::{MappedMemoryRange, MemoryDevice, MemoryPropertyFlags},
+};
+
+#[derive(Debug)]
+struct Relevant;
+
+impl Drop for Relevant {
+ fn drop(&mut self) {
+ report_error_on_drop!("Memory block wasn't deallocated");
+ }
+}
+
+/// Memory block allocated by `GpuAllocator`.
+#[derive(Debug)]
+pub struct MemoryBlock<M> {
+ memory_type: u32,
+ props: MemoryPropertyFlags,
+ offset: u64,
+ size: u64,
+ atom_mask: u64,
+ mapped: bool,
+ flavor: MemoryBlockFlavor<M>,
+ relevant: Relevant,
+}
+
+impl<M> MemoryBlock<M> {
+ pub(crate) fn new(
+ memory_type: u32,
+ props: MemoryPropertyFlags,
+ offset: u64,
+ size: u64,
+ atom_mask: u64,
+ flavor: MemoryBlockFlavor<M>,
+ ) -> Self {
+ isize::try_from(atom_mask).expect("`atom_mask` is too large");
+ MemoryBlock {
+ memory_type,
+ props,
+ offset,
+ size,
+ atom_mask,
+ flavor,
+ mapped: false,
+ relevant: Relevant,
+ }
+ }
+
+ pub(crate) fn deallocate(self) -> MemoryBlockFlavor<M> {
+ core::mem::forget(self.relevant);
+ self.flavor
+ }
+}
+
+unsafe impl<M> Sync for MemoryBlock<M> where M: Sync {}
+unsafe impl<M> Send for MemoryBlock<M> where M: Send {}
+
+#[derive(Debug)]
+pub(crate) enum MemoryBlockFlavor<M> {
+ Dedicated {
+ memory: M,
+ },
+ Buddy {
+ chunk: usize,
+ index: usize,
+ ptr: Option<NonNull<u8>>,
+ memory: Arc<M>,
+ },
+ FreeList {
+ chunk: u64,
+ ptr: Option<NonNull<u8>>,
+ memory: Arc<M>,
+ },
+}
+
+impl<M> MemoryBlock<M> {
+ /// Returns reference to parent memory object.
+ #[inline(always)]
+ pub fn memory(&self) -> &M {
+ match &self.flavor {
+ MemoryBlockFlavor::Dedicated { memory } => memory,
+ MemoryBlockFlavor::Buddy { memory, .. } => memory,
+ MemoryBlockFlavor::FreeList { memory, .. } => memory,
+ }
+ }
+
+ /// Returns offset in bytes from start of memory object to start of this block.
+ #[inline(always)]
+ pub fn offset(&self) -> u64 {
+ self.offset
+ }
+
+ /// Returns size of this memory block.
+ #[inline(always)]
+ pub fn size(&self) -> u64 {
+ self.size
+ }
+
+ /// Returns memory property flags for parent memory object.
+ #[inline(always)]
+ pub fn props(&self) -> MemoryPropertyFlags {
+ self.props
+ }
+
+ /// Returns index of type of parent memory object.
+ #[inline(always)]
+ pub fn memory_type(&self) -> u32 {
+ self.memory_type
+ }
+
+ /// Returns pointer to mapped memory range of this block.
+ /// This blocks becomes mapped.
+ ///
+ /// The user of returned pointer must guarantee that any previously submitted command that writes to this range has completed
+ /// before the host reads from or writes to that range,
+ /// and that any previously submitted command that reads from that range has completed
+ /// before the host writes to that region.
+ /// If the device memory was allocated without the `HOST_COHERENT` property flag set,
+ /// these guarantees must be made for an extended range:
+ /// the user must round down the start of the range to the nearest multiple of `non_coherent_atom_size`,
+ /// and round the end of the range up to the nearest multiple of `non_coherent_atom_size`.
+ ///
+ /// # Panics
+ ///
+ /// This function panics if block is currently mapped.
+ ///
+ /// # Safety
+ ///
+ /// `block` must have been allocated from specified `device`.
+ #[inline(always)]
+ pub unsafe fn map(
+ &mut self,
+ device: &impl MemoryDevice<M>,
+ offset: u64,
+ size: usize,
+ ) -> Result<NonNull<u8>, MapError> {
+ let size_u64 = u64::try_from(size).expect("`size` doesn't fit device address space");
+ assert!(offset < self.size, "`offset` is out of memory block bounds");
+ assert!(
+ size_u64 <= self.size - offset,
+ "`offset + size` is out of memory block bounds"
+ );
+
+ let ptr = match &mut self.flavor {
+ MemoryBlockFlavor::Dedicated { memory } => {
+ let end = align_up(offset + size_u64, self.atom_mask)
+ .expect("mapping end doesn't fit device address space");
+ let aligned_offset = align_down(offset, self.atom_mask);
+
+ if !acquire_mapping(&mut self.mapped) {
+ return Err(MapError::AlreadyMapped);
+ }
+ let result =
+ device.map_memory(memory, self.offset + aligned_offset, end - aligned_offset);
+
+ match result {
+ // the overflow is checked in `Self::new()`
+ Ok(ptr) => {
+ let ptr_offset = (offset - aligned_offset) as isize;
+ ptr.as_ptr().offset(ptr_offset)
+ }
+ Err(err) => {
+ release_mapping(&mut self.mapped);
+ return Err(err.into());
+ }
+ }
+ }
+ MemoryBlockFlavor::FreeList { ptr: Some(ptr), .. }
+ | MemoryBlockFlavor::Buddy { ptr: Some(ptr), .. } => {
+ if !acquire_mapping(&mut self.mapped) {
+ return Err(MapError::AlreadyMapped);
+ }
+ let offset_isize = isize::try_from(offset)
+ .expect("Buddy and linear block should fit host address space");
+ ptr.as_ptr().offset(offset_isize)
+ }
+ _ => return Err(MapError::NonHostVisible),
+ };
+
+ Ok(NonNull::new_unchecked(ptr))
+ }
+
+ /// Unmaps memory range of this block that was previously mapped with `Block::map`.
+ /// This block becomes unmapped.
+ ///
+ /// # Panics
+ ///
+ /// This function panics if this block is not currently mapped.
+ ///
+ /// # Safety
+ ///
+ /// `block` must have been allocated from specified `device`.
+ #[inline(always)]
+ pub unsafe fn unmap(&mut self, device: &impl MemoryDevice<M>) -> bool {
+ if !release_mapping(&mut self.mapped) {
+ return false;
+ }
+ match &mut self.flavor {
+ MemoryBlockFlavor::Dedicated { memory } => {
+ device.unmap_memory(memory);
+ }
+ MemoryBlockFlavor::Buddy { .. } => {}
+ MemoryBlockFlavor::FreeList { .. } => {}
+ }
+ true
+ }
+
+ /// Transiently maps block memory range and copies specified data
+ /// to the mapped memory range.
+ ///
+ /// # Panics
+ ///
+ /// This function panics if block is currently mapped.
+ ///
+ /// # Safety
+ ///
+ /// `block` must have been allocated from specified `device`.
+ /// The caller must guarantee that any previously submitted command that reads or writes to this range has completed.
+ #[inline(always)]
+ pub unsafe fn write_bytes(
+ &mut self,
+ device: &impl MemoryDevice<M>,
+ offset: u64,
+ data: &[u8],
+ ) -> Result<(), MapError> {
+ let size = data.len();
+ let ptr = self.map(device, offset, size)?;
+
+ copy_nonoverlapping(data.as_ptr(), ptr.as_ptr(), size);
+ let result = if !self.coherent() {
+ let aligned_offset = align_down(offset, self.atom_mask);
+ let end = align_up(offset + data.len() as u64, self.atom_mask).unwrap();
+
+ device.flush_memory_ranges(&[MappedMemoryRange {
+ memory: self.memory(),
+ offset: self.offset + aligned_offset,
+ size: end - aligned_offset,
+ }])
+ } else {
+ Ok(())
+ };
+
+ self.unmap(device);
+ result.map_err(Into::into)
+ }
+
+ /// Transiently maps block memory range and copies specified data
+ /// from the mapped memory range.
+ ///
+ /// # Panics
+ ///
+ /// This function panics if block is currently mapped.
+ ///
+ /// # Safety
+ ///
+ /// `block` must have been allocated from specified `device`.
+ /// The caller must guarantee that any previously submitted command that reads to this range has completed.
+ #[inline(always)]
+ pub unsafe fn read_bytes(
+ &mut self,
+ device: &impl MemoryDevice<M>,
+ offset: u64,
+ data: &mut [u8],
+ ) -> Result<(), MapError> {
+ #[cfg(feature = "tracing")]
+ {
+ if !self.cached() {
+ tracing::warn!("Reading from non-cached memory may be slow. Consider allocating HOST_CACHED memory block for host reads.")
+ }
+ }
+
+ let size = data.len();
+ let ptr = self.map(device, offset, size)?;
+ let result = if !self.coherent() {
+ let aligned_offset = align_down(offset, self.atom_mask);
+ let end = align_up(offset + data.len() as u64, self.atom_mask).unwrap();
+
+ device.invalidate_memory_ranges(&[MappedMemoryRange {
+ memory: self.memory(),
+ offset: self.offset + aligned_offset,
+ size: end - aligned_offset,
+ }])
+ } else {
+ Ok(())
+ };
+ if result.is_ok() {
+ copy_nonoverlapping(ptr.as_ptr(), data.as_mut_ptr(), size);
+ }
+
+ self.unmap(device);
+ result.map_err(Into::into)
+ }
+
+ fn coherent(&self) -> bool {
+ self.props.contains(MemoryPropertyFlags::HOST_COHERENT)
+ }
+
+ #[cfg(feature = "tracing")]
+ fn cached(&self) -> bool {
+ self.props.contains(MemoryPropertyFlags::HOST_CACHED)
+ }
+}
+
+fn acquire_mapping(mapped: &mut bool) -> bool {
+ if *mapped {
+ false
+ } else {
+ *mapped = true;
+ true
+ }
+}
+
+fn release_mapping(mapped: &mut bool) -> bool {
+ if *mapped {
+ *mapped = false;
+ true
+ } else {
+ false
+ }
+}
diff --git a/third_party/rust/gpu-alloc/src/buddy.rs b/third_party/rust/gpu-alloc/src/buddy.rs new file mode 100644 index 0000000000..dea303b948 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/buddy.rs @@ -0,0 +1,460 @@ +use {
+ crate::{
+ align_up, error::AllocationError, heap::Heap, slab::Slab, unreachable_unchecked,
+ util::try_arc_unwrap, MemoryBounds,
+ },
+ alloc::{sync::Arc, vec::Vec},
+ core::{convert::TryFrom as _, mem::replace, ptr::NonNull},
+ gpu_alloc_types::{AllocationFlags, DeviceMapError, MemoryDevice, MemoryPropertyFlags},
+};
+
+#[derive(Debug)]
+pub(crate) struct BuddyBlock<M> {
+ pub memory: Arc<M>,
+ pub ptr: Option<NonNull<u8>>,
+ pub offset: u64,
+ pub size: u64,
+ pub chunk: usize,
+ pub index: usize,
+}
+
+unsafe impl<M> Sync for BuddyBlock<M> where M: Sync {}
+unsafe impl<M> Send for BuddyBlock<M> where M: Send {}
+
+#[derive(Clone, Copy, Debug)]
+enum PairState {
+ Exhausted,
+ Ready {
+ ready: Side,
+ next: usize,
+ prev: usize,
+ },
+}
+
+impl PairState {
+ unsafe fn replace_next(&mut self, value: usize) -> usize {
+ match self {
+ PairState::Exhausted => unreachable_unchecked(),
+ PairState::Ready { next, .. } => replace(next, value),
+ }
+ }
+
+ unsafe fn replace_prev(&mut self, value: usize) -> usize {
+ match self {
+ PairState::Exhausted => unreachable_unchecked(),
+ PairState::Ready { prev, .. } => replace(prev, value),
+ }
+ }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum Side {
+ Left,
+ Right,
+}
+use Side::*;
+
+#[derive(Debug)]
+struct PairEntry {
+ state: PairState,
+ chunk: usize,
+ offset: u64,
+ parent: Option<usize>,
+}
+
+struct SizeBlockEntry {
+ chunk: usize,
+ offset: u64,
+ index: usize,
+}
+
+#[derive(Debug)]
+struct Size {
+ next_ready: usize,
+ pairs: Slab<PairEntry>,
+}
+#[derive(Debug)]
+enum Release {
+ None,
+ Parent(usize),
+ Chunk(usize),
+}
+
+impl Size {
+ fn new() -> Self {
+ Size {
+ pairs: Slab::new(),
+ next_ready: 0,
+ }
+ }
+
+ unsafe fn add_pair_and_acquire_left(
+ &mut self,
+ chunk: usize,
+ offset: u64,
+ parent: Option<usize>,
+ ) -> SizeBlockEntry {
+ if self.next_ready < self.pairs.len() {
+ unreachable_unchecked()
+ }
+
+ let index = self.pairs.insert(PairEntry {
+ state: PairState::Exhausted,
+ chunk,
+ offset,
+ parent,
+ });
+
+ let entry = self.pairs.get_unchecked_mut(index);
+ entry.state = PairState::Ready {
+ next: index,
+ prev: index,
+ ready: Right, // Left is allocated.
+ };
+ self.next_ready = index;
+
+ SizeBlockEntry {
+ chunk,
+ offset,
+ index: index << 1,
+ }
+ }
+
+ fn acquire(&mut self, size: u64) -> Option<SizeBlockEntry> {
+ if self.next_ready >= self.pairs.len() {
+ return None;
+ }
+
+ let ready = self.next_ready;
+
+ let entry = unsafe { self.pairs.get_unchecked_mut(ready) };
+ let chunk = entry.chunk;
+ let offset = entry.offset;
+
+ let bit = match entry.state {
+ PairState::Exhausted => unsafe { unreachable_unchecked() },
+ PairState::Ready { ready, next, prev } => {
+ entry.state = PairState::Exhausted;
+
+ if prev == self.next_ready {
+ // The only ready entry.
+ debug_assert_eq!(next, self.next_ready);
+ self.next_ready = self.pairs.len();
+ } else {
+ let prev_entry = unsafe { self.pairs.get_unchecked_mut(prev) };
+ let prev_next = unsafe { prev_entry.state.replace_next(next) };
+ debug_assert_eq!(prev_next, self.next_ready);
+
+ let next_entry = unsafe { self.pairs.get_unchecked_mut(next) };
+ let next_prev = unsafe { next_entry.state.replace_prev(prev) };
+ debug_assert_eq!(next_prev, self.next_ready);
+
+ self.next_ready = next;
+ }
+
+ match ready {
+ Left => 0,
+ Right => 1,
+ }
+ }
+ };
+
+ Some(SizeBlockEntry {
+ chunk,
+ offset: offset + bit as u64 * size,
+ index: (ready << 1) | bit as usize,
+ })
+ }
+
+ fn release(&mut self, index: usize) -> Release {
+ let side = match index & 1 {
+ 0 => Side::Left,
+ 1 => Side::Right,
+ _ => unsafe { unreachable_unchecked() },
+ };
+ let entry_index = index >> 1;
+
+ let len = self.pairs.len();
+
+ let entry = self.pairs.get_mut(entry_index);
+
+ let chunk = entry.chunk;
+ let offset = entry.offset;
+ let parent = entry.parent;
+
+ match entry.state {
+ PairState::Exhausted => {
+ if self.next_ready == len {
+ entry.state = PairState::Ready {
+ ready: side,
+ next: entry_index,
+ prev: entry_index,
+ };
+ self.next_ready = entry_index;
+ } else {
+ debug_assert!(self.next_ready < len);
+
+ let next = self.next_ready;
+ let next_entry = unsafe { self.pairs.get_unchecked_mut(next) };
+ let prev = unsafe { next_entry.state.replace_prev(entry_index) };
+
+ let prev_entry = unsafe { self.pairs.get_unchecked_mut(prev) };
+ let prev_next = unsafe { prev_entry.state.replace_next(entry_index) };
+ debug_assert_eq!(prev_next, next);
+
+ let entry = unsafe { self.pairs.get_unchecked_mut(entry_index) };
+ entry.state = PairState::Ready {
+ ready: side,
+ next,
+ prev,
+ };
+ }
+ Release::None
+ }
+
+ PairState::Ready { ready, .. } if ready == side => {
+ panic!("Attempt to dealloate already free block")
+ }
+
+ PairState::Ready { next, prev, .. } => {
+ unsafe {
+ self.pairs.remove_unchecked(entry_index);
+ }
+
+ if prev == entry_index {
+ debug_assert_eq!(next, entry_index);
+ self.next_ready = self.pairs.len();
+ } else {
+ let prev_entry = unsafe { self.pairs.get_unchecked_mut(prev) };
+ let prev_next = unsafe { prev_entry.state.replace_next(next) };
+ debug_assert_eq!(prev_next, entry_index);
+
+ let next_entry = unsafe { self.pairs.get_unchecked_mut(next) };
+ let next_prev = unsafe { next_entry.state.replace_prev(prev) };
+ debug_assert_eq!(next_prev, entry_index);
+
+ self.next_ready = next;
+ }
+
+ match parent {
+ Some(parent) => Release::Parent(parent),
+ None => {
+ debug_assert_eq!(offset, 0);
+ Release::Chunk(chunk)
+ }
+ }
+ }
+ }
+ }
+}
+
+#[derive(Debug)]
+struct Chunk<M> {
+ memory: Arc<M>,
+ ptr: Option<NonNull<u8>>,
+ size: u64,
+}
+
+#[derive(Debug)]
+pub(crate) struct BuddyAllocator<M> {
+ minimal_size: u64,
+ chunks: Slab<Chunk<M>>,
+ sizes: Vec<Size>,
+ memory_type: u32,
+ props: MemoryPropertyFlags,
+ atom_mask: u64,
+}
+
+unsafe impl<M> Sync for BuddyAllocator<M> where M: Sync {}
+unsafe impl<M> Send for BuddyAllocator<M> where M: Send {}
+
+impl<M> BuddyAllocator<M>
+where
+ M: MemoryBounds + 'static,
+{
+ pub fn new(
+ minimal_size: u64,
+ initial_dedicated_size: u64,
+ memory_type: u32,
+ props: MemoryPropertyFlags,
+ atom_mask: u64,
+ ) -> Self {
+ assert!(
+ minimal_size.is_power_of_two(),
+ "Minimal allocation size of buddy allocator must be power of two"
+ );
+ assert!(
+ initial_dedicated_size.is_power_of_two(),
+ "Dedicated allocation size of buddy allocator must be power of two"
+ );
+
+ let initial_sizes = (initial_dedicated_size
+ .trailing_zeros()
+ .saturating_sub(minimal_size.trailing_zeros())) as usize;
+
+ BuddyAllocator {
+ minimal_size,
+ chunks: Slab::new(),
+ sizes: (0..initial_sizes).map(|_| Size::new()).collect(),
+ memory_type,
+ props,
+ atom_mask: atom_mask | (minimal_size - 1),
+ }
+ }
+
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
+ pub unsafe fn alloc(
+ &mut self,
+ device: &impl MemoryDevice<M>,
+ size: u64,
+ align_mask: u64,
+ flags: AllocationFlags,
+ heap: &mut Heap,
+ allocations_remains: &mut u32,
+ ) -> Result<BuddyBlock<M>, AllocationError> {
+ let align_mask = align_mask | self.atom_mask;
+
+ let size = align_up(size, align_mask)
+ .and_then(|size| size.checked_next_power_of_two())
+ .ok_or(AllocationError::OutOfDeviceMemory)?;
+
+ let size = size.max(self.minimal_size);
+
+ let size_index = size.trailing_zeros() - self.minimal_size.trailing_zeros();
+ let size_index =
+ usize::try_from(size_index).map_err(|_| AllocationError::OutOfDeviceMemory)?;
+
+ while self.sizes.len() <= size_index {
+ self.sizes.push(Size::new());
+ }
+
+ let host_visible = self.host_visible();
+
+ let mut candidate_size_index = size_index;
+
+ let (mut entry, entry_size_index) = loop {
+ let sizes_len = self.sizes.len();
+
+ let candidate_size_entry = &mut self.sizes[candidate_size_index];
+ let candidate_size = self.minimal_size << candidate_size_index;
+
+ if let Some(entry) = candidate_size_entry.acquire(candidate_size) {
+ break (entry, candidate_size_index);
+ }
+
+ if sizes_len == candidate_size_index + 1 {
+ // That's size of device allocation.
+ if *allocations_remains == 0 {
+ return Err(AllocationError::TooManyObjects);
+ }
+
+ let chunk_size = self.minimal_size << (candidate_size_index + 1);
+ let mut memory = device.allocate_memory(chunk_size, self.memory_type, flags)?;
+ *allocations_remains -= 1;
+ heap.alloc(chunk_size);
+
+ let ptr = if host_visible {
+ match device.map_memory(&mut memory, 0, chunk_size) {
+ Ok(ptr) => Some(ptr),
+ Err(DeviceMapError::OutOfDeviceMemory) => {
+ return Err(AllocationError::OutOfDeviceMemory)
+ }
+ Err(DeviceMapError::MapFailed) | Err(DeviceMapError::OutOfHostMemory) => {
+ return Err(AllocationError::OutOfHostMemory)
+ }
+ }
+ } else {
+ None
+ };
+
+ let chunk = self.chunks.insert(Chunk {
+ memory: Arc::new(memory),
+ ptr,
+ size: chunk_size,
+ });
+
+ let entry = candidate_size_entry.add_pair_and_acquire_left(chunk, 0, None);
+
+ break (entry, candidate_size_index);
+ }
+
+ candidate_size_index += 1;
+ };
+
+ for size_index in (size_index..entry_size_index).rev() {
+ let size_entry = &mut self.sizes[size_index];
+ entry =
+ size_entry.add_pair_and_acquire_left(entry.chunk, entry.offset, Some(entry.index));
+ }
+
+ let chunk_entry = self.chunks.get_unchecked(entry.chunk);
+
+ debug_assert!(
+ entry
+ .offset
+ .checked_add(size)
+ .map_or(false, |end| end <= chunk_entry.size),
+ "Offset + size is not in chunk bounds"
+ );
+
+ Ok(BuddyBlock {
+ memory: chunk_entry.memory.clone(),
+ ptr: chunk_entry
+ .ptr
+ .map(|ptr| NonNull::new_unchecked(ptr.as_ptr().add(entry.offset as usize))),
+ offset: entry.offset,
+ size,
+ chunk: entry.chunk,
+ index: entry.index,
+ })
+ }
+
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
+ pub unsafe fn dealloc(
+ &mut self,
+ device: &impl MemoryDevice<M>,
+ block: BuddyBlock<M>,
+ heap: &mut Heap,
+ allocations_remains: &mut u32,
+ ) {
+ debug_assert!(block.size.is_power_of_two());
+
+ let size_index =
+ (block.size.trailing_zeros() - self.minimal_size.trailing_zeros()) as usize;
+
+ let mut release_index = block.index;
+ let mut release_size_index = size_index;
+
+ loop {
+ match self.sizes[release_size_index].release(release_index) {
+ Release::Parent(parent) => {
+ release_size_index += 1;
+ release_index = parent;
+ }
+ Release::Chunk(chunk) => {
+ debug_assert_eq!(chunk, block.chunk);
+ debug_assert_eq!(
+ self.chunks.get(chunk).size,
+ self.minimal_size << (release_size_index + 1)
+ );
+ let chunk = self.chunks.remove(chunk);
+ drop(block);
+
+ let memory = try_arc_unwrap(chunk.memory)
+ .expect("Memory shared after last block deallocated");
+
+ device.deallocate_memory(memory);
+ *allocations_remains += 1;
+ heap.dealloc(chunk.size);
+
+ return;
+ }
+ Release::None => return,
+ }
+ }
+ }
+
+ fn host_visible(&self) -> bool {
+ self.props.contains(MemoryPropertyFlags::HOST_VISIBLE)
+ }
+}
diff --git a/third_party/rust/gpu-alloc/src/config.rs b/third_party/rust/gpu-alloc/src/config.rs new file mode 100644 index 0000000000..20d53a3d75 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/config.rs @@ -0,0 +1,77 @@ +/// Configuration for [`GpuAllocator`]
+///
+/// [`GpuAllocator`]: type.GpuAllocator
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct Config {
+ /// Size in bytes of request that will be served by dedicated memory object.
+ /// This value should be large enough to not exhaust memory object limit
+ /// and not use slow memory object allocation when it is not necessary.
+ pub dedicated_threshold: u64,
+
+ /// Size in bytes of request that will be served by dedicated memory object if preferred.
+ /// This value should be large enough to not exhaust memory object limit
+ /// and not use slow memory object allocation when it is not necessary.
+ ///
+ /// This won't make much sense if this value is larger than `dedicated_threshold`.
+ pub preferred_dedicated_threshold: u64,
+
+ /// Size in bytes of transient memory request that will be served by dedicated memory object.
+ /// This value should be large enough to not exhaust memory object limit
+ /// and not use slow memory object allocation when it is not necessary.
+ ///
+ /// This won't make much sense if this value is lesser than `dedicated_threshold`.
+ pub transient_dedicated_threshold: u64,
+
+ /// Size in bytes of first chunk in free-list allocator.
+ pub starting_free_list_chunk: u64,
+
+ /// Upper limit for size in bytes of chunks in free-list allocator.
+ pub final_free_list_chunk: u64,
+
+ /// Minimal size for buddy allocator.
+ pub minimal_buddy_size: u64,
+
+ /// Initial memory object size for buddy allocator.
+ /// If less than `minimal_buddy_size` then `minimal_buddy_size` is used instead.
+ pub initial_buddy_dedicated_size: u64,
+}
+
+impl Config {
+ /// Returns default configuration.
+ ///
+ /// This is not `Default` implementation to discourage usage outside of
+ /// prototyping.
+ ///
+ /// Proper configuration should depend on hardware and intended usage.\
+ /// But those values can be used as starting point.\
+ /// Note that they can simply not work for some platforms with lesser
+ /// memory capacity than today's "modern" GPU (year 2020).
+ pub fn i_am_prototyping() -> Self {
+ // Assume that today's modern GPU is made of 1024 potatoes.
+ let potato = Config::i_am_potato();
+
+ Config {
+ dedicated_threshold: potato.dedicated_threshold * 1024,
+ preferred_dedicated_threshold: potato.preferred_dedicated_threshold * 1024,
+ transient_dedicated_threshold: potato.transient_dedicated_threshold * 1024,
+ starting_free_list_chunk: potato.starting_free_list_chunk * 1024,
+ final_free_list_chunk: potato.final_free_list_chunk * 1024,
+ minimal_buddy_size: potato.minimal_buddy_size * 1024,
+ initial_buddy_dedicated_size: potato.initial_buddy_dedicated_size * 1024,
+ }
+ }
+
+ /// Returns default configuration for average sized potato.
+ pub fn i_am_potato() -> Self {
+ Config {
+ dedicated_threshold: 32 * 1024,
+ preferred_dedicated_threshold: 1024,
+ transient_dedicated_threshold: 128 * 1024,
+ starting_free_list_chunk: 8 * 1024,
+ final_free_list_chunk: 128 * 1024,
+ minimal_buddy_size: 1,
+ initial_buddy_dedicated_size: 8 * 1024,
+ }
+ }
+}
diff --git a/third_party/rust/gpu-alloc/src/error.rs b/third_party/rust/gpu-alloc/src/error.rs new file mode 100644 index 0000000000..98c396b1fe --- /dev/null +++ b/third_party/rust/gpu-alloc/src/error.rs @@ -0,0 +1,116 @@ +use {
+ core::fmt::{self, Display},
+ gpu_alloc_types::{DeviceMapError, OutOfMemory},
+};
+
+/// Enumeration of possible errors that may occur during memory allocation.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum AllocationError {
+ /// Backend reported that device memory has been exhausted.\
+ /// Deallocating device memory from the same heap may increase chance
+ /// that another allocation would succeed.
+ OutOfDeviceMemory,
+
+ /// Backend reported that host memory has been exhausted.\
+ /// Deallocating host memory may increase chance that another allocation would succeed.
+ OutOfHostMemory,
+
+ /// Allocation request cannot be fulfilled as no available memory types allowed
+ /// by `Request.memory_types` mask is compatible with `request.usage`.
+ NoCompatibleMemoryTypes,
+
+ /// Reached limit on allocated memory objects count.\
+ /// Deallocating device memory may increase chance that another allocation would succeed.
+ /// Especially dedicated memory blocks.
+ ///
+ /// If this error is returned when memory heaps are far from exhausted
+ /// `Config` should be tweaked to allocate larger memory objects.
+ TooManyObjects,
+}
+
+impl From<OutOfMemory> for AllocationError {
+ fn from(err: OutOfMemory) -> Self {
+ match err {
+ OutOfMemory::OutOfDeviceMemory => AllocationError::OutOfDeviceMemory,
+ OutOfMemory::OutOfHostMemory => AllocationError::OutOfHostMemory,
+ }
+ }
+}
+
+impl Display for AllocationError {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ AllocationError::OutOfDeviceMemory => fmt.write_str("Device memory exhausted"),
+ AllocationError::OutOfHostMemory => fmt.write_str("Host memory exhausted"),
+ AllocationError::NoCompatibleMemoryTypes => fmt.write_str(
+ "No compatible memory types from requested types support requested usage",
+ ),
+ AllocationError::TooManyObjects => {
+ fmt.write_str("Reached limit on allocated memory objects count")
+ }
+ }
+ }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for AllocationError {}
+
+/// Enumeration of possible errors that may occur during memory mapping.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum MapError {
+ /// Backend reported that device memory has been exhausted.\
+ /// Deallocating device memory from the same heap may increase chance
+ /// that another mapping would succeed.
+ OutOfDeviceMemory,
+
+ /// Backend reported that host memory has been exhausted.\
+ /// Deallocating host memory may increase chance that another mapping would succeed.
+ OutOfHostMemory,
+
+ /// Attempt to map memory block with non-host-visible memory type.\
+ /// Ensure to include `UsageFlags::HOST_ACCESS` into allocation request
+ /// when memory mapping is intended.
+ NonHostVisible,
+
+ /// Map failed for implementation specific reason.\
+ /// For Vulkan backend this includes failed attempt
+ /// to allocate large enough virtual address space.
+ MapFailed,
+
+ /// Mapping failed due to block being already mapped.
+ AlreadyMapped,
+}
+
+impl From<DeviceMapError> for MapError {
+ fn from(err: DeviceMapError) -> Self {
+ match err {
+ DeviceMapError::OutOfDeviceMemory => MapError::OutOfDeviceMemory,
+ DeviceMapError::OutOfHostMemory => MapError::OutOfHostMemory,
+ DeviceMapError::MapFailed => MapError::MapFailed,
+ }
+ }
+}
+
+impl From<OutOfMemory> for MapError {
+ fn from(err: OutOfMemory) -> Self {
+ match err {
+ OutOfMemory::OutOfDeviceMemory => MapError::OutOfDeviceMemory,
+ OutOfMemory::OutOfHostMemory => MapError::OutOfHostMemory,
+ }
+ }
+}
+
+impl Display for MapError {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ MapError::OutOfDeviceMemory => fmt.write_str("Device memory exhausted"),
+ MapError::OutOfHostMemory => fmt.write_str("Host memory exhausted"),
+ MapError::MapFailed => fmt.write_str("Failed to map memory object"),
+ MapError::NonHostVisible => fmt.write_str("Impossible to map non-host-visible memory"),
+ MapError::AlreadyMapped => fmt.write_str("Block is already mapped"),
+ }
+ }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for MapError {}
diff --git a/third_party/rust/gpu-alloc/src/freelist.rs b/third_party/rust/gpu-alloc/src/freelist.rs new file mode 100644 index 0000000000..a6ece27be3 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/freelist.rs @@ -0,0 +1,528 @@ +use {
+ crate::{
+ align_down, align_up,
+ error::AllocationError,
+ heap::Heap,
+ util::{arc_unwrap, is_arc_unique},
+ MemoryBounds,
+ },
+ alloc::{sync::Arc, vec::Vec},
+ core::{cmp::Ordering, ptr::NonNull},
+ gpu_alloc_types::{AllocationFlags, DeviceMapError, MemoryDevice, MemoryPropertyFlags},
+};
+
+unsafe fn opt_ptr_add(ptr: Option<NonNull<u8>>, size: u64) -> Option<NonNull<u8>> {
+ ptr.map(|ptr| {
+ // Size is within memory region started at `ptr`.
+ // size is within `chunk_size` that fits `isize`.
+ NonNull::new_unchecked(ptr.as_ptr().offset(size as isize))
+ })
+}
+
+#[derive(Debug)]
+pub(super) struct FreeList<M> {
+ array: Vec<FreeListRegion<M>>,
+ counter: u64,
+}
+
+impl<M> FreeList<M> {
+ pub fn new() -> Self {
+ FreeList {
+ array: Vec::new(),
+ counter: 0,
+ }
+ }
+
+ pub fn get_block_from_new_memory(
+ &mut self,
+ memory: Arc<M>,
+ memory_size: u64,
+ ptr: Option<NonNull<u8>>,
+ align_mask: u64,
+ size: u64,
+ ) -> FreeListBlock<M> {
+ debug_assert!(size <= memory_size);
+
+ self.counter += 1;
+ self.array.push(FreeListRegion {
+ memory,
+ ptr,
+ chunk: self.counter,
+ start: 0,
+ end: memory_size,
+ });
+ self.get_block_at(self.array.len() - 1, align_mask, size)
+ }
+
+ pub fn get_block(&mut self, align_mask: u64, size: u64) -> Option<FreeListBlock<M>> {
+ let (index, _) = self.array.iter().enumerate().rev().find(|(_, region)| {
+ match region.end.checked_sub(size) {
+ Some(start) => {
+ let aligned_start = align_down(start, align_mask);
+ aligned_start >= region.start
+ }
+ None => false,
+ }
+ })?;
+
+ Some(self.get_block_at(index, align_mask, size))
+ }
+
+ fn get_block_at(&mut self, index: usize, align_mask: u64, size: u64) -> FreeListBlock<M> {
+ let region = &mut self.array[index];
+
+ let start = region.end - size;
+ let aligned_start = align_down(start, align_mask);
+
+ if aligned_start > region.start {
+ let block = FreeListBlock {
+ offset: aligned_start,
+ size: region.end - aligned_start,
+ chunk: region.chunk,
+ ptr: unsafe { opt_ptr_add(region.ptr, aligned_start - region.start) },
+ memory: region.memory.clone(),
+ };
+
+ region.end = aligned_start;
+
+ block
+ } else {
+ debug_assert_eq!(aligned_start, region.start);
+ let region = self.array.remove(index);
+ region.into_block()
+ }
+ }
+
+ pub fn insert_block(&mut self, block: FreeListBlock<M>) {
+ match self.array.binary_search_by(|b| b.cmp(&block)) {
+ Ok(_) => {
+ panic!("Overlapping block found in free list");
+ }
+ Err(index) if self.array.len() > index => match &mut self.array[..=index] {
+ [] => unreachable!(),
+ [next] => {
+ debug_assert!(!next.is_suffix_block(&block));
+
+ if next.is_prefix_block(&block) {
+ next.merge_prefix_block(block);
+ } else {
+ self.array.insert(0, FreeListRegion::from_block(block));
+ }
+ }
+ [.., prev, next] => {
+ debug_assert!(!prev.is_prefix_block(&block));
+ debug_assert!(!next.is_suffix_block(&block));
+
+ if next.is_prefix_block(&block) {
+ next.merge_prefix_block(block);
+
+ if prev.consecutive(&*next) {
+ let next = self.array.remove(index);
+ let prev = &mut self.array[index - 1];
+ prev.merge(next);
+ }
+ } else if prev.is_suffix_block(&block) {
+ prev.merge_suffix_block(block);
+ } else {
+ self.array.insert(index, FreeListRegion::from_block(block));
+ }
+ }
+ },
+ Err(_) => match &mut self.array[..] {
+ [] => self.array.push(FreeListRegion::from_block(block)),
+ [.., prev] => {
+ debug_assert!(!prev.is_prefix_block(&block));
+ if prev.is_suffix_block(&block) {
+ prev.merge_suffix_block(block);
+ } else {
+ self.array.push(FreeListRegion::from_block(block));
+ }
+ }
+ },
+ }
+ }
+
+ pub fn drain(&mut self, keep_last: bool) -> Option<impl Iterator<Item = (M, u64)> + '_> {
+ // Time to deallocate
+
+ let len = self.array.len();
+
+ let mut del = 0;
+ {
+ let regions = &mut self.array[..];
+
+ for i in 0..len {
+ if (i < len - 1 || !keep_last) && is_arc_unique(&mut regions[i].memory) {
+ del += 1;
+ } else if del > 0 {
+ regions.swap(i - del, i);
+ }
+ }
+ }
+
+ if del > 0 {
+ Some(self.array.drain(len - del..).map(move |region| {
+ debug_assert_eq!(region.start, 0);
+ (unsafe { arc_unwrap(region.memory) }, region.end)
+ }))
+ } else {
+ None
+ }
+ }
+}
+
+#[derive(Debug)]
+struct FreeListRegion<M> {
+ memory: Arc<M>,
+ ptr: Option<NonNull<u8>>,
+ chunk: u64,
+ start: u64,
+ end: u64,
+}
+
+unsafe impl<M> Sync for FreeListRegion<M> where M: Sync {}
+unsafe impl<M> Send for FreeListRegion<M> where M: Send {}
+
+impl<M> FreeListRegion<M> {
+ pub fn cmp(&self, block: &FreeListBlock<M>) -> Ordering {
+ debug_assert_eq!(
+ Arc::ptr_eq(&self.memory, &block.memory),
+ self.chunk == block.chunk
+ );
+
+ if self.chunk == block.chunk {
+ debug_assert_eq!(
+ Ord::cmp(&self.start, &block.offset),
+ Ord::cmp(&self.end, &(block.offset + block.size)),
+ "Free region {{ start: {}, end: {} }} overlaps with block {{ offset: {}, size: {} }}",
+ self.start,
+ self.end,
+ block.offset,
+ block.size,
+ );
+ }
+
+ Ord::cmp(&self.chunk, &block.chunk).then(Ord::cmp(&self.start, &block.offset))
+ }
+
+ fn from_block(block: FreeListBlock<M>) -> Self {
+ FreeListRegion {
+ memory: block.memory,
+ chunk: block.chunk,
+ ptr: block.ptr,
+ start: block.offset,
+ end: block.offset + block.size,
+ }
+ }
+
+ fn into_block(self) -> FreeListBlock<M> {
+ FreeListBlock {
+ memory: self.memory,
+ chunk: self.chunk,
+ ptr: self.ptr,
+ offset: self.start,
+ size: self.end - self.start,
+ }
+ }
+
+ fn consecutive(&self, other: &Self) -> bool {
+ if self.chunk != other.chunk {
+ return false;
+ }
+
+ debug_assert!(Arc::ptr_eq(&self.memory, &other.memory));
+
+ debug_assert_eq!(
+ Ord::cmp(&self.start, &other.start),
+ Ord::cmp(&self.end, &other.end)
+ );
+
+ self.end == other.start
+ }
+
+ fn merge(&mut self, next: FreeListRegion<M>) {
+ debug_assert!(self.consecutive(&next));
+ self.end = next.end;
+ }
+
+ fn is_prefix_block(&self, block: &FreeListBlock<M>) -> bool {
+ if self.chunk != block.chunk {
+ return false;
+ }
+
+ debug_assert!(Arc::ptr_eq(&self.memory, &block.memory));
+
+ debug_assert_eq!(
+ Ord::cmp(&self.start, &block.offset),
+ Ord::cmp(&self.end, &(block.offset + block.size))
+ );
+
+ self.start == (block.offset + block.size)
+ }
+
+ fn merge_prefix_block(&mut self, block: FreeListBlock<M>) {
+ debug_assert!(self.is_prefix_block(&block));
+ self.start = block.offset;
+ self.ptr = block.ptr;
+ }
+
+ fn is_suffix_block(&self, block: &FreeListBlock<M>) -> bool {
+ if self.chunk != block.chunk {
+ return false;
+ }
+
+ debug_assert!(Arc::ptr_eq(&self.memory, &block.memory));
+
+ debug_assert_eq!(
+ Ord::cmp(&self.start, &block.offset),
+ Ord::cmp(&self.end, &(block.offset + block.size))
+ );
+
+ self.end == block.offset
+ }
+
+ fn merge_suffix_block(&mut self, block: FreeListBlock<M>) {
+ debug_assert!(self.is_suffix_block(&block));
+ self.end += block.size;
+ }
+}
+
+#[derive(Debug)]
+pub struct FreeListBlock<M> {
+ pub memory: Arc<M>,
+ pub ptr: Option<NonNull<u8>>,
+ pub chunk: u64,
+ pub offset: u64,
+ pub size: u64,
+}
+
+unsafe impl<M> Sync for FreeListBlock<M> where M: Sync {}
+unsafe impl<M> Send for FreeListBlock<M> where M: Send {}
+
+#[derive(Debug)]
+pub(crate) struct FreeListAllocator<M> {
+ freelist: FreeList<M>,
+ chunk_size: u64,
+ final_chunk_size: u64,
+ memory_type: u32,
+ props: MemoryPropertyFlags,
+ atom_mask: u64,
+
+ total_allocations: u64,
+ total_deallocations: u64,
+}
+
+impl<M> Drop for FreeListAllocator<M> {
+ fn drop(&mut self) {
+ match Ord::cmp(&self.total_allocations, &self.total_deallocations) {
+ Ordering::Equal => {}
+ Ordering::Greater => {
+ report_error_on_drop!("Not all blocks were deallocated")
+ }
+ Ordering::Less => {
+ report_error_on_drop!("More blocks deallocated than allocated")
+ }
+ }
+
+ if !self.freelist.array.is_empty() {
+ report_error_on_drop!(
+ "FreeListAllocator has free blocks on drop. Allocator should be cleaned"
+ );
+ }
+ }
+}
+
+impl<M> FreeListAllocator<M>
+where
+ M: MemoryBounds + 'static,
+{
+ pub fn new(
+ starting_chunk_size: u64,
+ final_chunk_size: u64,
+ memory_type: u32,
+ props: MemoryPropertyFlags,
+ atom_mask: u64,
+ ) -> Self {
+ debug_assert_eq!(
+ align_down(starting_chunk_size, atom_mask),
+ starting_chunk_size
+ );
+
+ let starting_chunk_size = min(starting_chunk_size, isize::max_value());
+
+ debug_assert_eq!(align_down(final_chunk_size, atom_mask), final_chunk_size);
+ let final_chunk_size = min(final_chunk_size, isize::max_value());
+
+ FreeListAllocator {
+ freelist: FreeList::new(),
+ chunk_size: starting_chunk_size,
+ final_chunk_size,
+ memory_type,
+ props,
+ atom_mask,
+
+ total_allocations: 0,
+ total_deallocations: 0,
+ }
+ }
+
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
+ pub unsafe fn alloc(
+ &mut self,
+ device: &impl MemoryDevice<M>,
+ size: u64,
+ align_mask: u64,
+ flags: AllocationFlags,
+ heap: &mut Heap,
+ allocations_remains: &mut u32,
+ ) -> Result<FreeListBlock<M>, AllocationError> {
+ debug_assert!(
+ self.final_chunk_size >= size,
+ "GpuAllocator must not request allocations equal or greater to chunks size"
+ );
+
+ let size = align_up(size, self.atom_mask).expect(
+ "Any value not greater than final chunk size (which is aligned) has to fit for alignment",
+ );
+
+ let align_mask = align_mask | self.atom_mask;
+ let host_visible = self.host_visible();
+
+ if size <= self.chunk_size {
+ // Otherwise there can't be any sufficiently large free blocks
+ if let Some(block) = self.freelist.get_block(align_mask, size) {
+ self.total_allocations += 1;
+ return Ok(block);
+ }
+ }
+
+ // New allocation is required.
+ if *allocations_remains == 0 {
+ return Err(AllocationError::TooManyObjects);
+ }
+
+ if size > self.chunk_size {
+ let multiple = (size - 1) / self.chunk_size + 1;
+ let multiple = multiple.next_power_of_two();
+
+ self.chunk_size = (self.chunk_size * multiple).min(self.final_chunk_size);
+ }
+
+ let mut memory = device.allocate_memory(self.chunk_size, self.memory_type, flags)?;
+ *allocations_remains -= 1;
+ heap.alloc(self.chunk_size);
+
+ // Map host visible allocations
+ let ptr = if host_visible {
+ match device.map_memory(&mut memory, 0, self.chunk_size) {
+ Ok(ptr) => Some(ptr),
+ Err(DeviceMapError::MapFailed) => {
+ #[cfg(feature = "tracing")]
+ tracing::error!("Failed to map host-visible memory in linear allocator");
+ device.deallocate_memory(memory);
+ *allocations_remains += 1;
+ heap.dealloc(self.chunk_size);
+
+ return Err(AllocationError::OutOfHostMemory);
+ }
+ Err(DeviceMapError::OutOfDeviceMemory) => {
+ return Err(AllocationError::OutOfDeviceMemory);
+ }
+ Err(DeviceMapError::OutOfHostMemory) => {
+ return Err(AllocationError::OutOfHostMemory);
+ }
+ }
+ } else {
+ None
+ };
+
+ let memory = Arc::new(memory);
+ let block =
+ self.freelist
+ .get_block_from_new_memory(memory, self.chunk_size, ptr, align_mask, size);
+
+ if self.chunk_size < self.final_chunk_size {
+ // Double next chunk size
+ // Limit to final value.
+ self.chunk_size = (self.chunk_size * 2).min(self.final_chunk_size);
+ }
+
+ self.total_allocations += 1;
+ Ok(block)
+ }
+
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
+ pub unsafe fn dealloc(
+ &mut self,
+ device: &impl MemoryDevice<M>,
+ block: FreeListBlock<M>,
+ heap: &mut Heap,
+ allocations_remains: &mut u32,
+ ) {
+ debug_assert!(block.size < self.chunk_size);
+ debug_assert_ne!(block.size, 0);
+ self.freelist.insert_block(block);
+ self.total_deallocations += 1;
+
+ if let Some(memory) = self.freelist.drain(true) {
+ memory.for_each(|(memory, size)| {
+ device.deallocate_memory(memory);
+ *allocations_remains += 1;
+ heap.dealloc(size);
+ });
+ }
+ }
+
+ /// Deallocates leftover memory objects.
+ /// Should be used before dropping.
+ ///
+ /// # Safety
+ ///
+ /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance
+ /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance
+ /// and memory blocks allocated from it
+ #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
+ pub unsafe fn cleanup(
+ &mut self,
+ device: &impl MemoryDevice<M>,
+ heap: &mut Heap,
+ allocations_remains: &mut u32,
+ ) {
+ if let Some(memory) = self.freelist.drain(false) {
+ memory.for_each(|(memory, size)| {
+ device.deallocate_memory(memory);
+ *allocations_remains += 1;
+ heap.dealloc(size);
+ });
+ }
+
+ #[cfg(feature = "tracing")]
+ {
+ if self.total_allocations == self.total_deallocations && !self.freelist.array.is_empty()
+ {
+ tracing::error!(
+ "Some regions were not deallocated on cleanup, although all blocks are free.
+ This is a bug in `FreeBlockAllocator`.
+ See array of free blocks left:
+ {:#?}",
+ self.freelist.array,
+ );
+ }
+ }
+ }
+
+ fn host_visible(&self) -> bool {
+ self.props.contains(MemoryPropertyFlags::HOST_VISIBLE)
+ }
+}
+
+fn min<L, R>(l: L, r: R) -> L
+where
+ R: core::convert::TryInto<L>,
+ L: Ord,
+{
+ match r.try_into() {
+ Ok(r) => core::cmp::min(l, r),
+ Err(_) => l,
+ }
+}
diff --git a/third_party/rust/gpu-alloc/src/heap.rs b/third_party/rust/gpu-alloc/src/heap.rs new file mode 100644 index 0000000000..f049f14768 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/heap.rs @@ -0,0 +1,32 @@ +#[derive(Debug)]
+pub(crate) struct Heap {
+ size: u64,
+ used: u64,
+ allocated: u128,
+ deallocated: u128,
+}
+
+impl Heap {
+ pub(crate) fn new(size: u64) -> Self {
+ Heap {
+ size,
+ used: 0,
+ allocated: 0,
+ deallocated: 0,
+ }
+ }
+
+ pub(crate) fn size(&mut self) -> u64 {
+ self.size
+ }
+
+ pub(crate) fn alloc(&mut self, size: u64) {
+ self.used += size;
+ self.allocated += u128::from(size);
+ }
+
+ pub(crate) fn dealloc(&mut self, size: u64) {
+ self.used -= size;
+ self.deallocated += u128::from(size);
+ }
+}
diff --git a/third_party/rust/gpu-alloc/src/lib.rs b/third_party/rust/gpu-alloc/src/lib.rs new file mode 100644 index 0000000000..3533f45124 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/lib.rs @@ -0,0 +1,124 @@ +//!
+//! Implementation agnostic memory allocator for Vulkan like APIs.
+//!
+//! This crate is intended to be used as part of safe API implementations.\
+//! Use with caution. There are unsafe functions all over the place.
+//!
+//! # Usage
+//!
+//! Start with fetching `DeviceProperties` from `gpu-alloc-<backend>` crate for the backend of choice.\
+//! Then create `GpuAllocator` instance and use it for all device memory allocations.\
+//! `GpuAllocator` will take care for all necessary bookkeeping like memory object count limit,
+//! heap budget and memory mapping.
+//!
+//! ### Backends implementations
+//!
+//! Backend supporting crates should not depend on this crate.\
+//! Instead they should depend on `gpu-alloc-types` which is much more stable,
+//! allowing to upgrade `gpu-alloc` version without `gpu-alloc-<backend>` upgrade.
+//!
+
+#![cfg_attr(not(feature = "std"), no_std)]
+
+extern crate alloc;
+
+#[cfg(feature = "tracing")]
+macro_rules! report_error_on_drop {
+ ($($tokens:tt)*) => {{
+ #[cfg(feature = "std")]
+ {
+ if std::thread::panicking() {
+ return;
+ }
+ }
+
+ tracing::error!($($tokens)*)
+ }};
+}
+
+#[cfg(all(not(feature = "tracing"), feature = "std"))]
+macro_rules! report_error_on_drop {
+ ($($tokens:tt)*) => {{
+ if std::thread::panicking() {
+ return;
+ }
+ eprintln!($($tokens)*)
+ }};
+}
+
+#[cfg(all(not(feature = "tracing"), not(feature = "std")))]
+macro_rules! report_error_on_drop {
+ ($($tokens:tt)*) => {{
+ panic!($($tokens)*)
+ }};
+}
+
+mod allocator;
+mod block;
+mod buddy;
+mod config;
+mod error;
+mod freelist;
+mod heap;
+mod slab;
+mod usage;
+mod util;
+
+pub use {
+ self::{allocator::*, block::MemoryBlock, config::*, error::*, usage::*},
+ gpu_alloc_types::*,
+};
+
+/// Memory request for allocator.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub struct Request {
+ /// Minimal size of memory block required.
+ /// Returned block may have larger size,
+ /// use `MemoryBlock::size` to learn actual size of returned block.
+ pub size: u64,
+
+ /// Minimal alignment mask required.
+ /// Returned block may have larger alignment,
+ /// use `MemoryBlock::align` to learn actual alignment of returned block.
+ pub align_mask: u64,
+
+ /// Intended memory usage.
+ /// Returned block may support additional usages,
+ /// use `MemoryBlock::props` to learn memory properties of returned block.
+ pub usage: UsageFlags,
+
+ /// Bitset for memory types.
+ /// Returned block will be from memory type corresponding to one of set bits,
+ /// use `MemoryBlock::memory_type` to learn memory type index of returned block.
+ pub memory_types: u32,
+}
+
+/// Aligns `value` up to `align_mask`
+/// Returns smallest integer not lesser than `value` aligned by `align_mask`.
+/// Returns `None` on overflow.
+pub(crate) fn align_up(value: u64, align_mask: u64) -> Option<u64> {
+ Some(value.checked_add(align_mask)? & !align_mask)
+}
+
+/// Align `value` down to `align_mask`
+/// Returns largest integer not bigger than `value` aligned by `align_mask`.
+pub(crate) fn align_down(value: u64, align_mask: u64) -> u64 {
+ value & !align_mask
+}
+
+#[cfg(debug_assertions)]
+#[allow(unused_unsafe)]
+unsafe fn unreachable_unchecked() -> ! {
+ unreachable!()
+}
+
+#[cfg(not(debug_assertions))]
+unsafe fn unreachable_unchecked() -> ! {
+ core::hint::unreachable_unchecked()
+}
+
+// #[cfg(feature = "tracing")]
+use core::fmt::Debug as MemoryBounds;
+
+// #[cfg(not(feature = "tracing"))]
+// use core::any::Any as MemoryBounds;
diff --git a/third_party/rust/gpu-alloc/src/slab.rs b/third_party/rust/gpu-alloc/src/slab.rs new file mode 100644 index 0000000000..de413435fe --- /dev/null +++ b/third_party/rust/gpu-alloc/src/slab.rs @@ -0,0 +1,97 @@ +use {crate::unreachable_unchecked, alloc::vec::Vec, core::mem::replace};
+
+#[derive(Debug)]
+enum Entry<T> {
+ Vacant(usize),
+ Occupied(T),
+}
+#[derive(Debug)]
+pub(crate) struct Slab<T> {
+ next_vacant: usize,
+ entries: Vec<Entry<T>>,
+}
+
+impl<T> Slab<T> {
+ pub fn new() -> Self {
+ Slab {
+ next_vacant: !0,
+ entries: Vec::new(),
+ }
+ }
+
+ /// Inserts value into this linked vec and returns index
+ /// at which value can be accessed in constant time.
+ pub fn insert(&mut self, value: T) -> usize {
+ if self.next_vacant >= self.entries.len() {
+ self.entries.push(Entry::Occupied(value));
+ self.entries.len() - 1
+ } else {
+ match *unsafe { self.entries.get_unchecked(self.next_vacant) } {
+ Entry::Vacant(next_vacant) => {
+ unsafe {
+ *self.entries.get_unchecked_mut(self.next_vacant) = Entry::Occupied(value);
+ }
+ replace(&mut self.next_vacant, next_vacant)
+ }
+ _ => unsafe { unreachable_unchecked() },
+ }
+ }
+ }
+
+ pub fn len(&self) -> usize {
+ self.entries.len()
+ }
+
+ pub unsafe fn get_unchecked(&self, index: usize) -> &T {
+ debug_assert!(index < self.len());
+
+ match self.entries.get_unchecked(index) {
+ Entry::Occupied(value) => value,
+ _ => unreachable_unchecked(),
+ }
+ }
+
+ pub unsafe fn get_unchecked_mut(&mut self, index: usize) -> &mut T {
+ debug_assert!(index < self.len());
+
+ match self.entries.get_unchecked_mut(index) {
+ Entry::Occupied(value) => value,
+ _ => unreachable_unchecked(),
+ }
+ }
+
+ pub fn get(&self, index: usize) -> &T {
+ match self.entries.get(index) {
+ Some(Entry::Occupied(value)) => value,
+ _ => panic!("Invalid index"),
+ }
+ }
+
+ pub fn get_mut(&mut self, index: usize) -> &mut T {
+ match self.entries.get_mut(index) {
+ Some(Entry::Occupied(value)) => value,
+ _ => panic!("Invalid index"),
+ }
+ }
+
+ pub unsafe fn remove_unchecked(&mut self, index: usize) -> T {
+ let entry = replace(
+ self.entries.get_unchecked_mut(index),
+ Entry::Vacant(self.next_vacant),
+ );
+
+ self.next_vacant = index;
+
+ match entry {
+ Entry::Occupied(value) => value,
+ _ => unreachable_unchecked(),
+ }
+ }
+
+ pub fn remove(&mut self, index: usize) -> T {
+ match self.entries.get_mut(index) {
+ Some(Entry::Occupied(_)) => unsafe { self.remove_unchecked(index) },
+ _ => panic!("Invalid index"),
+ }
+ }
+}
diff --git a/third_party/rust/gpu-alloc/src/usage.rs b/third_party/rust/gpu-alloc/src/usage.rs new file mode 100644 index 0000000000..683c79d2fa --- /dev/null +++ b/third_party/rust/gpu-alloc/src/usage.rs @@ -0,0 +1,176 @@ +use {
+ core::fmt::{self, Debug},
+ gpu_alloc_types::{MemoryPropertyFlags, MemoryType},
+};
+
+bitflags::bitflags! {
+ /// Memory usage type.
+ /// Bits set define intended usage for requested memory.
+ #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+ #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+ pub struct UsageFlags: u8 {
+ /// Hints for allocator to find memory with faster device access.
+ /// If no flags is specified than `FAST_DEVICE_ACCESS` is implied.
+ const FAST_DEVICE_ACCESS = 0x01;
+
+ /// Memory will be accessed from host.
+ /// This flags guarantees that host memory operations will be available.
+ /// Otherwise implementation is encouraged to use non-host-accessible memory.
+ const HOST_ACCESS = 0x02;
+
+ /// Hints allocator that memory will be used for data downloading.
+ /// Allocator will strongly prefer host-cached memory.
+ /// Implies `HOST_ACCESS` flag.
+ const DOWNLOAD = 0x04;
+
+ /// Hints allocator that memory will be used for data uploading.
+ /// If `DOWNLOAD` flag is not set then allocator will assume that
+ /// host will access memory in write-only manner and may
+ /// pick not host-cached.
+ /// Implies `HOST_ACCESS` flag.
+ const UPLOAD = 0x08;
+
+ /// Hints allocator that memory will be used for short duration
+ /// allowing to use faster algorithm with less memory overhead.
+ /// If use holds returned memory block for too long then
+ /// effective memory overhead increases instead.
+ /// Best use case is for staging buffer for single batch of operations.
+ const TRANSIENT = 0x10;
+
+ /// Requests memory that can be addressed with `u64`.
+ /// Allows fetching device address for resources bound to that memory.
+ const DEVICE_ADDRESS = 0x20;
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+struct MemoryForOneUsage {
+ mask: u32,
+ types: [u32; 32],
+ types_count: u32,
+}
+
+pub(crate) struct MemoryForUsage {
+ usages: [MemoryForOneUsage; 64],
+}
+
+impl Debug for MemoryForUsage {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt.debug_struct("MemoryForUsage")
+ .field("usages", &&self.usages[..])
+ .finish()
+ }
+}
+
+impl MemoryForUsage {
+ pub fn new(memory_types: &[MemoryType]) -> Self {
+ assert!(
+ memory_types.len() <= 32,
+ "Only up to 32 memory types supported"
+ );
+
+ let mut mfu = MemoryForUsage {
+ usages: [MemoryForOneUsage {
+ mask: 0,
+ types: [0; 32],
+ types_count: 0,
+ }; 64],
+ };
+
+ for usage in 0..64 {
+ mfu.usages[usage as usize] =
+ one_usage(UsageFlags::from_bits_truncate(usage), memory_types);
+ }
+
+ mfu
+ }
+
+ /// Returns mask with bits set for memory type indices that support the
+ /// usage.
+ pub fn mask(&self, usage: UsageFlags) -> u32 {
+ self.usages[usage.bits() as usize].mask
+ }
+
+ /// Returns slice of memory type indices that support the usage.
+ /// Earlier memory type has priority over later.
+ pub fn types(&self, usage: UsageFlags) -> &[u32] {
+ let usage = &self.usages[usage.bits() as usize];
+ &usage.types[..usage.types_count as usize]
+ }
+}
+
+fn one_usage(usage: UsageFlags, memory_types: &[MemoryType]) -> MemoryForOneUsage {
+ let mut types = [0; 32];
+ let mut types_count = 0;
+
+ for (index, mt) in memory_types.iter().enumerate() {
+ if compatible(usage, mt.props) {
+ types[types_count as usize] = index as u32;
+ types_count += 1;
+ }
+ }
+
+ types[..types_count as usize]
+ .sort_unstable_by_key(|&index| reverse_priority(usage, memory_types[index as usize].props));
+
+ let mask = types[..types_count as usize]
+ .iter()
+ .fold(0u32, |mask, index| mask | 1u32 << index);
+
+ MemoryForOneUsage {
+ mask,
+ types,
+ types_count,
+ }
+}
+
+fn compatible(usage: UsageFlags, flags: MemoryPropertyFlags) -> bool {
+ type Flags = MemoryPropertyFlags;
+ if flags.contains(Flags::LAZILY_ALLOCATED) || flags.contains(Flags::PROTECTED) {
+ // Unsupported
+ false
+ } else if usage.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD)
+ {
+ // Requires HOST_VISIBLE
+ flags.contains(Flags::HOST_VISIBLE)
+ } else {
+ true
+ }
+}
+
+/// Returns reversed priority of memory with specified flags for specified usage.
+/// Lesser value returned = more prioritized.
+fn reverse_priority(usage: UsageFlags, flags: MemoryPropertyFlags) -> u32 {
+ type Flags = MemoryPropertyFlags;
+
+ // Highly prefer device local memory when `FAST_DEVICE_ACCESS` usage is specified
+ // or usage is empty.
+ let device_local: bool = flags.contains(Flags::DEVICE_LOCAL)
+ ^ (usage.is_empty() || usage.contains(UsageFlags::FAST_DEVICE_ACCESS));
+
+ assert!(
+ flags.contains(Flags::HOST_VISIBLE)
+ || !usage
+ .intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD)
+ );
+
+ // Prefer non-host-visible memory when host access is not required.
+ let host_visible: bool = flags.contains(Flags::HOST_VISIBLE)
+ ^ usage.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD);
+
+ // Prefer cached memory for downloads.
+ // Or non-cached if downloads are not expected.
+ let host_cached: bool =
+ flags.contains(Flags::HOST_CACHED) ^ usage.contains(UsageFlags::DOWNLOAD);
+
+ // Prefer coherent for both uploads and downloads.
+ // Prefer non-coherent if neither flags is set.
+ let host_coherent: bool = flags.contains(Flags::HOST_COHERENT)
+ ^ (usage.intersects(UsageFlags::UPLOAD | UsageFlags::DOWNLOAD));
+
+ // Each boolean is false if flags are preferred.
+ device_local as u32 * 8
+ + host_visible as u32 * 4
+ + host_cached as u32 * 2
+ + host_coherent as u32
+}
diff --git a/third_party/rust/gpu-alloc/src/util.rs b/third_party/rust/gpu-alloc/src/util.rs new file mode 100644 index 0000000000..c22d30dbbd --- /dev/null +++ b/third_party/rust/gpu-alloc/src/util.rs @@ -0,0 +1,44 @@ +use alloc::sync::Arc;
+
+/// Guarantees uniqueness only if `Weak` pointers are never created
+/// from this `Arc` or clones.
+pub(crate) fn is_arc_unique<M>(arc: &mut Arc<M>) -> bool {
+ let strong_count = Arc::strong_count(&*arc);
+ debug_assert_ne!(strong_count, 0, "This Arc should exist");
+
+ debug_assert!(
+ strong_count > 1 || Arc::get_mut(arc).is_some(),
+ "`Weak` pointer exists"
+ );
+
+ strong_count == 1
+}
+
+/// Can be used instead of `Arc::try_unwrap(arc).unwrap()`
+/// when it is guaranteed to succeed.
+pub(crate) unsafe fn arc_unwrap<M>(mut arc: Arc<M>) -> M {
+ use core::{mem::ManuallyDrop, ptr::read};
+ debug_assert!(is_arc_unique(&mut arc));
+
+ // Get raw pointer to inner value.
+ let raw = Arc::into_raw(arc);
+
+ // As `Arc` is unique and no Weak pointers exist
+ // it won't be dereferenced elsewhere.
+ let inner = read(raw);
+
+ // Cast to `ManuallyDrop` which guarantees to have same layout
+ // and will skip dropping.
+ drop(Arc::from_raw(raw as *const ManuallyDrop<M>));
+ inner
+}
+
+/// Can be used instead of `Arc::try_unwrap`
+/// only if `Weak` pointers are never created from this `Arc` or clones.
+pub(crate) unsafe fn try_arc_unwrap<M>(mut arc: Arc<M>) -> Option<M> {
+ if is_arc_unique(&mut arc) {
+ Some(arc_unwrap(arc))
+ } else {
+ None
+ }
+}
|