diff options
Diffstat (limited to '')
-rw-r--r-- | third_party/rust/gpu-alloc-types/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc-types/Cargo.toml | 26 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc-types/src/device.rs | 157 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc-types/src/lib.rs | 8 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc-types/src/types.rs | 54 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/Cargo.toml | 46 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/allocator.rs | 510 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/block.rs | 327 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/buddy.rs | 458 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/config.rs | 77 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/error.rs | 116 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/freelist.rs | 528 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/heap.rs | 32 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/lib.rs | 124 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/slab.rs | 97 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/usage.rs | 167 | ||||
-rw-r--r-- | third_party/rust/gpu-alloc/src/util.rs | 44 |
18 files changed, 2773 insertions, 0 deletions
diff --git a/third_party/rust/gpu-alloc-types/.cargo-checksum.json b/third_party/rust/gpu-alloc-types/.cargo-checksum.json new file mode 100644 index 0000000000..4ce7fc40c3 --- /dev/null +++ b/third_party/rust/gpu-alloc-types/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"5ec80402fb950b3e70b291c64a49275dd951221b71ee74f8be5a16a74d42d296","src/device.rs":"1f44d08d50fc59d55bd3f92308e4965bde9134e7f5daa03594e122f7cdb1dfa2","src/lib.rs":"0f77c21a5770b54affa146e5f4c15abcb83599de551226d336ee48ec5185f866","src/types.rs":"8c6acec203faa11856076193835f6d9a1924469682e1fdf11a0325936a1255ee"},"package":"54804d0d6bc9d7f26db4eaec1ad10def69b599315f487d32c334a80d1efe67a5"}
\ No newline at end of file diff --git a/third_party/rust/gpu-alloc-types/Cargo.toml b/third_party/rust/gpu-alloc-types/Cargo.toml new file mode 100644 index 0000000000..a4e0210d0b --- /dev/null +++ b/third_party/rust/gpu-alloc-types/Cargo.toml @@ -0,0 +1,26 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +edition = "2018" +name = "gpu-alloc-types" +version = "0.2.0" +authors = ["Zakarum <zakarumych@ya.ru>"] +description = "Core types of gpu-alloc crate" +homepage = "https://github.com/zakarumych/gpu-alloc" +documentation = "https://docs.rs/gpu-alloc-types/0.2.0" +keywords = ["gpu", "vulkan", "allocation", "no-std"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/zakarumych/gpu-alloc" +[dependencies.bitflags] +version = "1.2" +default-features = false diff --git a/third_party/rust/gpu-alloc-types/src/device.rs b/third_party/rust/gpu-alloc-types/src/device.rs new file mode 100644 index 0000000000..d05f2e8fde --- /dev/null +++ b/third_party/rust/gpu-alloc-types/src/device.rs @@ -0,0 +1,157 @@ +use {
+ crate::types::{MemoryHeap, MemoryType},
+ alloc::borrow::Cow,
+ core::ptr::NonNull,
+};
+
+/// Memory exhausted error.
+#[derive(Debug)]
+pub enum OutOfMemory {
+ /// Device memory exhausted.
+ OutOfDeviceMemory,
+
+ /// Host memory exhausted.
+ OutOfHostMemory,
+}
+
+/// Memory mapped error.
+#[derive(Debug)]
+pub enum DeviceMapError {
+ /// Device memory exhausted.
+ OutOfDeviceMemory,
+
+ /// Host memory exhausted.
+ OutOfHostMemory,
+
+ /// Map failed due to implementation specific error.
+ MapFailed,
+}
+
+/// Specifies range of the mapped memory region.
+#[derive(Debug)]
+pub struct MappedMemoryRange<'a, M> {
+ /// Memory object reference.
+ pub memory: &'a M,
+
+ /// Offset in bytes from start of the memory object.
+ pub offset: u64,
+
+ /// Size in bytes of the memory range.
+ pub size: u64,
+}
+
+/// Properties of the device that will be used for allocating memory objects.
+///
+/// See `gpu-alloc-<backend>` crate to learn how to obtain one for backend of choice.
+#[derive(Debug)]
+pub struct DeviceProperties<'a> {
+ /// Array of memory types provided by the device.
+ pub memory_types: Cow<'a, [MemoryType]>,
+
+ /// Array of memory heaps provided by the device.
+ pub memory_heaps: Cow<'a, [MemoryHeap]>,
+
+ /// Maximum number of valid memory allocations that can exist simultaneously within the device.
+ pub max_memory_allocation_count: u32,
+
+ /// Maximum size for single allocation supported by the device.
+ pub max_memory_allocation_size: u64,
+
+ /// Atom size for host mappable non-coherent memory.
+ pub non_coherent_atom_size: u64,
+
+ /// Specifies if feature required to fetch device address is enabled.
+ pub buffer_device_address: bool,
+}
+
+bitflags::bitflags! {
+ /// Allocation flags
+ #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+ pub struct AllocationFlags : u8 {
+ /// Specifies that the memory can be used for buffers created
+ /// with flag that allows fetching device address.
+ const DEVICE_ADDRESS = 0x1;
+ }
+}
+
+/// Abstract device that can be used to allocate memory objects.
+pub trait MemoryDevice<M> {
+ /// Allocates new memory object from device.
+ /// This function may be expensive and even limit maximum number of memory
+ /// objects allocated.
+ /// Which is the reason for sub-allocation this crate provides.
+ ///
+ /// # Safety
+ ///
+ /// `memory_type` must be valid index for memory type associated with this device.
+ /// Retreiving this information is implementation specific.
+ ///
+ /// `flags` must be supported by the device.
+ unsafe fn allocate_memory(
+ &self,
+ size: u64,
+ memory_type: u32,
+ flags: AllocationFlags,
+ ) -> Result<M, OutOfMemory>;
+
+ /// Deallocate memory object.
+ ///
+ /// # Safety
+ ///
+ /// Memory object must have been allocated from this device.\
+ /// All clones of specified memory handle must be dropped before calling this function.
+ unsafe fn deallocate_memory(&self, memory: M);
+
+ /// Map region of device memory to host memory space.
+ ///
+ /// # Safety
+ ///
+ /// * Memory object must have been allocated from this device.
+ /// * Memory object must not be already mapped.
+ /// * Memory must be allocated from type with `HOST_VISIBLE` property.
+ /// * `offset + size` must not overflow.
+ /// * `offset + size` must not be larger than memory object size specified when
+ /// memory object was allocated from this device.
+ unsafe fn map_memory(
+ &self,
+ memory: &mut M,
+ offset: u64,
+ size: u64,
+ ) -> Result<NonNull<u8>, DeviceMapError>;
+
+ /// Unmap previously mapped memory region.
+ ///
+ /// # Safety
+ ///
+ /// * Memory object must have been allocated from this device.
+ /// * Memory object must be mapped
+ unsafe fn unmap_memory(&self, memory: &mut M);
+
+ /// Invalidates ranges of memory mapped regions.
+ ///
+ /// # Safety
+ ///
+ /// * Memory objects must have been allocated from this device.
+ /// * `offset` and `size` in each element of `ranges` must specify
+ /// subregion of currently mapped memory region
+ /// * if `memory` in some element of `ranges` does not contain `HOST_COHERENT` property
+ /// then `offset` and `size` of that element must be multiple of `non_coherent_atom_size`.
+ unsafe fn invalidate_memory_ranges(
+ &self,
+ ranges: &[MappedMemoryRange<'_, M>],
+ ) -> Result<(), OutOfMemory>;
+
+ /// Flushes ranges of memory mapped regions.
+ ///
+ /// # Safety
+ ///
+ /// * Memory objects must have been allocated from this device.
+ /// * `offset` and `size` in each element of `ranges` must specify
+ /// subregion of currently mapped memory region
+ /// * if `memory` in some element of `ranges` does not contain `HOST_COHERENT` property
+ /// then `offset` and `size` of that element must be multiple of `non_coherent_atom_size`.
+ unsafe fn flush_memory_ranges(
+ &self,
+ ranges: &[MappedMemoryRange<'_, M>],
+ ) -> Result<(), OutOfMemory>;
+}
diff --git a/third_party/rust/gpu-alloc-types/src/lib.rs b/third_party/rust/gpu-alloc-types/src/lib.rs new file mode 100644 index 0000000000..708fa8714c --- /dev/null +++ b/third_party/rust/gpu-alloc-types/src/lib.rs @@ -0,0 +1,8 @@ +#![no_std]
+
+extern crate alloc;
+
+mod device;
+mod types;
+
+pub use self::{device::*, types::*};
diff --git a/third_party/rust/gpu-alloc-types/src/types.rs b/third_party/rust/gpu-alloc-types/src/types.rs new file mode 100644 index 0000000000..9c6fd3bd30 --- /dev/null +++ b/third_party/rust/gpu-alloc-types/src/types.rs @@ -0,0 +1,54 @@ +bitflags::bitflags! {
+ /// Memory properties type.
+ #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+ pub struct MemoryPropertyFlags: u8 {
+ /// This flag is set for device-local memory types.
+ /// Device-local memory is situated "close" to the GPU cores
+ /// and allows for fast access.
+ const DEVICE_LOCAL = 0x01;
+
+ /// This flag is set for host-visible memory types.
+ /// Host-visible memory can be mapped to the host memory range.
+ const HOST_VISIBLE = 0x02;
+
+ /// This flag is set for host-coherent memory types.
+ /// Host-coherent memory does not requires manual invalidation for
+ /// modifications on GPU to become visible on host;
+ /// nor flush for modification on host to become visible on GPU.
+ /// Access synchronization is still required.
+ const HOST_COHERENT = 0x04;
+
+ /// This flag is set for host-cached memory types.
+ /// Host-cached memory uses cache in host memory for faster reads from host.
+ const HOST_CACHED = 0x08;
+
+ /// This flag is set for lazily-allocated memory types.
+ /// Lazily-allocated memory must be used (and only) for transient image attachments.
+ const LAZILY_ALLOCATED = 0x10;
+
+ /// This flag is set for protected memory types.
+ /// Protected memory can be used for writing by protected operations
+ /// and can be read only by protected operations.
+ /// Protected memory cannot be host-visible.
+ /// Implementation must guarantee that there is no way for data to flow
+ /// from protected to unprotected memory.
+ const PROTECTED = 0x20;
+ }
+}
+
+/// Defines memory type.
+#[derive(Clone, Copy, Debug)]
+pub struct MemoryType {
+ /// Heap index of the memory type.
+ pub heap: u32,
+
+ /// Propety flags of the memory type.
+ pub props: MemoryPropertyFlags,
+}
+
+/// Defines memory heap.
+#[derive(Clone, Copy, Debug)]
+pub struct MemoryHeap {
+ /// Size of memory heap in bytes.
+ pub size: u64,
+}
diff --git a/third_party/rust/gpu-alloc/.cargo-checksum.json b/third_party/rust/gpu-alloc/.cargo-checksum.json new file mode 100644 index 0000000000..6f41510d58 --- /dev/null +++ b/third_party/rust/gpu-alloc/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"d444caa50cc5b4e0ee201da5347df73c6e77d3eedaae9e69d007c24dd0bb7bfa","src/allocator.rs":"c06b84e062859e1dcd21fa5cd71c8faafcca325b53dcc7b2fee3df945163e820","src/block.rs":"6efc369712e3d880a0d16ab1ef18062a88fefae9bf0a4e626de301cbb1b962ee","src/buddy.rs":"f8eacc183f2ee66faba05dec6f33659afa797e765326c96be4a8bafa651c0e47","src/config.rs":"49bb14c27713c6b1c0c91c094b0f00be22bd26efa660ff979fb1ee1193e9d70e","src/error.rs":"11337a66fc32f3423a1355ffd597d2c7ea28c2b7ce07d707fd5905c330edba08","src/freelist.rs":"3fbbcb96e307e0e021b38df6fe04d129e5d954fcd5a0144e388371442ef20916","src/heap.rs":"03f7c623a0389263073ab48797a191ca06c5611006df7b79c9dc8615c130f495","src/lib.rs":"78800455337d231af24814d84f4e5eb61c5181b12daec06fdcee2d2a75ccb221","src/slab.rs":"2df136b7e0a85eec8a198c7f807678d3a2043c46233e7ec63cd2de72fe562477","src/usage.rs":"0e4788c437718807cdd0b88301228cc27a6b8398313a91e1b70c5f2ab04dcbfb","src/util.rs":"31a496ee3407eb6b0dea255e30e154d805a0d08d777be9c06bdf56d1135bf950"},"package":"7fc59e5f710e310e76e6707f86c561dd646f69a8876da9131703b2f717de818d"}
\ No newline at end of file diff --git a/third_party/rust/gpu-alloc/Cargo.toml b/third_party/rust/gpu-alloc/Cargo.toml new file mode 100644 index 0000000000..51c65b43a3 --- /dev/null +++ b/third_party/rust/gpu-alloc/Cargo.toml @@ -0,0 +1,46 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "gpu-alloc" +version = "0.5.3" +authors = ["Zakarum <zakarumych@ya.ru>"] +description = "Implementation agnostic memory allocator for Vulkan like APIs" +homepage = "https://github.com/zakarumych/gpu-alloc" +documentation = "https://docs.rs/gpu-alloc-types" +readme = "../README.md" +keywords = ["gpu", "vulkan", "allocation", "no-std"] +categories = ["graphics", "memory-management", "no-std", "game-development"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/zakarumych/gpu-alloc" +[dependencies.bitflags] +version = "1.2" +default-features = false + +[dependencies.gpu-alloc-types] +version = "0.2" + +[dependencies.serde] +version = "1.0" +features = ["derive"] +optional = true +default-features = false + +[dependencies.tracing] +version = "0.1.27" +features = ["attributes"] +optional = true +default-features = false + +[features] +default = ["std"] +std = [] diff --git a/third_party/rust/gpu-alloc/src/allocator.rs b/third_party/rust/gpu-alloc/src/allocator.rs new file mode 100644 index 0000000000..80937c8d06 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/allocator.rs @@ -0,0 +1,510 @@ +use { + crate::{ + align_down, + block::{MemoryBlock, MemoryBlockFlavor}, + buddy::{BuddyAllocator, BuddyBlock}, + config::Config, + error::AllocationError, + freelist::{FreeListAllocator, FreeListBlock}, + heap::Heap, + usage::{MemoryForUsage, UsageFlags}, + MemoryBounds, Request, + }, + alloc::boxed::Box, + core::convert::TryFrom as _, + gpu_alloc_types::{ + AllocationFlags, DeviceProperties, MemoryDevice, MemoryPropertyFlags, MemoryType, + OutOfMemory, + }, +}; + +/// Memory allocator for Vulkan-like APIs. +#[derive(Debug)] +pub struct GpuAllocator<M> { + dedicated_threshold: u64, + preferred_dedicated_threshold: u64, + transient_dedicated_threshold: u64, + max_memory_allocation_size: u64, + memory_for_usage: MemoryForUsage, + memory_types: Box<[MemoryType]>, + memory_heaps: Box<[Heap]>, + allocations_remains: u32, + non_coherent_atom_mask: u64, + starting_free_list_chunk: u64, + final_free_list_chunk: u64, + minimal_buddy_size: u64, + initial_buddy_dedicated_size: u64, + buffer_device_address: bool, + + buddy_allocators: Box<[Option<BuddyAllocator<M>>]>, + freelist_allocators: Box<[Option<FreeListAllocator<M>>]>, +} + +/// Hints for allocator to decide on allocation strategy. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum Dedicated { + /// Allocation directly from device.\ + /// Very slow. + /// Count of allocations is limited.\ + /// Use with caution.\ + /// Must be used if resource has to be bound to dedicated memory object. + Required, + + /// Hint for allocator that dedicated memory object is preferred.\ + /// Should be used if it is known that resource placed in dedicated memory object + /// would allow for better performance.\ + /// Implementation is allowed to return block to shared memory object. + Preferred, +} + +impl<M> GpuAllocator<M> +where + M: MemoryBounds + 'static, +{ + /// Creates new instance of `GpuAllocator`. + /// Provided `DeviceProperties` should match properties of `MemoryDevice` that will be used + /// with created `GpuAllocator` instance. + #[cfg_attr(feature = "tracing", tracing::instrument)] + pub fn new(config: Config, props: DeviceProperties<'_>) -> Self { + assert!( + props.non_coherent_atom_size.is_power_of_two(), + "`non_coherent_atom_size` must be power of two" + ); + + assert!( + isize::try_from(props.non_coherent_atom_size).is_ok(), + "`non_coherent_atom_size` must fit host address space" + ); + + GpuAllocator { + dedicated_threshold: config.dedicated_threshold, + preferred_dedicated_threshold: config + .preferred_dedicated_threshold + .min(config.dedicated_threshold), + + transient_dedicated_threshold: config + .transient_dedicated_threshold + .max(config.dedicated_threshold), + + max_memory_allocation_size: props.max_memory_allocation_size, + + memory_for_usage: MemoryForUsage::new(props.memory_types.as_ref()), + + memory_types: props.memory_types.as_ref().iter().copied().collect(), + memory_heaps: props + .memory_heaps + .as_ref() + .iter() + .map(|heap| Heap::new(heap.size)) + .collect(), + + buffer_device_address: props.buffer_device_address, + + allocations_remains: props.max_memory_allocation_count, + non_coherent_atom_mask: props.non_coherent_atom_size - 1, + + starting_free_list_chunk: config.starting_free_list_chunk, + final_free_list_chunk: config.final_free_list_chunk, + minimal_buddy_size: config.minimal_buddy_size, + initial_buddy_dedicated_size: config.initial_buddy_dedicated_size, + + buddy_allocators: props.memory_types.as_ref().iter().map(|_| None).collect(), + freelist_allocators: props.memory_types.as_ref().iter().map(|_| None).collect(), + } + } + + /// Allocates memory block from specified `device` according to the `request`. + /// + /// # Safety + /// + /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance. + /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance + /// and memory blocks allocated from it. + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn alloc( + &mut self, + device: &impl MemoryDevice<M>, + request: Request, + ) -> Result<MemoryBlock<M>, AllocationError> { + self.alloc_internal(device, request, None) + } + + /// Allocates memory block from specified `device` according to the `request`. + /// This function allows user to force specific allocation strategy. + /// Improper use can lead to suboptimal performance or too large overhead. + /// Prefer `GpuAllocator::alloc` if doubt. + /// + /// # Safety + /// + /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance. + /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance + /// and memory blocks allocated from it. + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn alloc_with_dedicated( + &mut self, + device: &impl MemoryDevice<M>, + request: Request, + dedicated: Dedicated, + ) -> Result<MemoryBlock<M>, AllocationError> { + self.alloc_internal(device, request, Some(dedicated)) + } + + unsafe fn alloc_internal( + &mut self, + device: &impl MemoryDevice<M>, + mut request: Request, + dedicated: Option<Dedicated>, + ) -> Result<MemoryBlock<M>, AllocationError> { + enum Strategy { + Buddy, + Dedicated, + FreeList, + } + + request.usage = with_implicit_usage_flags(request.usage); + + if request.usage.contains(UsageFlags::DEVICE_ADDRESS) { + assert!(self.buffer_device_address, "`DEVICE_ADDRESS` cannot be requested when `DeviceProperties::buffer_device_address` is false"); + } + + if request.size > self.max_memory_allocation_size { + return Err(AllocationError::OutOfDeviceMemory); + } + + if let Some(Dedicated::Required) = dedicated { + if self.allocations_remains == 0 { + return Err(AllocationError::TooManyObjects); + } + } + + if 0 == self.memory_for_usage.mask(request.usage) & request.memory_types { + #[cfg(feature = "tracing")] + tracing::error!( + "Cannot serve request {:?}, no memory among bitset `{}` support usage {:?}", + request, + request.memory_types, + request.usage + ); + + return Err(AllocationError::NoCompatibleMemoryTypes); + } + + let transient = request.usage.contains(UsageFlags::TRANSIENT); + + for &index in self.memory_for_usage.types(request.usage) { + if 0 == request.memory_types & (1 << index) { + // Skip memory type incompatible with the request. + continue; + } + + let memory_type = &self.memory_types[index as usize]; + let heap = memory_type.heap; + let heap = &mut self.memory_heaps[heap as usize]; + + let atom_mask = if host_visible_non_coherent(memory_type.props) { + self.non_coherent_atom_mask + } else { + 0 + }; + + let flags = if self.buffer_device_address { + AllocationFlags::DEVICE_ADDRESS + } else { + AllocationFlags::empty() + }; + + let strategy = match (dedicated, transient) { + (Some(Dedicated::Required), _) => Strategy::Dedicated, + (Some(Dedicated::Preferred), _) + if request.size >= self.preferred_dedicated_threshold => + { + Strategy::Dedicated + } + (_, true) => { + let threshold = self.transient_dedicated_threshold.min(heap.size() / 32); + + if request.size < threshold { + Strategy::FreeList + } else { + Strategy::Dedicated + } + } + (_, false) => { + let threshold = self.dedicated_threshold.min(heap.size() / 32); + + if request.size < threshold { + Strategy::Buddy + } else { + Strategy::Dedicated + } + } + }; + + match strategy { + Strategy::Dedicated => { + #[cfg(feature = "tracing")] + tracing::debug!( + "Allocating memory object `{}@{:?}`", + request.size, + memory_type + ); + + match device.allocate_memory(request.size, index, flags) { + Ok(memory) => { + self.allocations_remains -= 1; + heap.alloc(request.size); + + return Ok(MemoryBlock::new( + index, + memory_type.props, + 0, + request.size, + atom_mask, + MemoryBlockFlavor::Dedicated { memory }, + )); + } + Err(OutOfMemory::OutOfDeviceMemory) => continue, + Err(OutOfMemory::OutOfHostMemory) => { + return Err(AllocationError::OutOfHostMemory) + } + } + } + Strategy::FreeList => { + let allocator = match &mut self.freelist_allocators[index as usize] { + Some(allocator) => allocator, + slot => { + let starting_free_list_chunk = match align_down( + self.starting_free_list_chunk.min(heap.size() / 32), + atom_mask, + ) { + 0 => atom_mask, + other => other, + }; + + let final_free_list_chunk = match align_down( + self.final_free_list_chunk + .max(self.starting_free_list_chunk) + .max(self.transient_dedicated_threshold) + .min(heap.size() / 32), + atom_mask, + ) { + 0 => atom_mask, + other => other, + }; + + slot.get_or_insert(FreeListAllocator::new( + starting_free_list_chunk, + final_free_list_chunk, + index, + memory_type.props, + if host_visible_non_coherent(memory_type.props) { + self.non_coherent_atom_mask + } else { + 0 + }, + )) + } + }; + let result = allocator.alloc( + device, + request.size, + request.align_mask, + flags, + heap, + &mut self.allocations_remains, + ); + + match result { + Ok(block) => { + return Ok(MemoryBlock::new( + index, + memory_type.props, + block.offset, + block.size, + atom_mask, + MemoryBlockFlavor::FreeList { + chunk: block.chunk, + ptr: block.ptr, + memory: block.memory, + }, + )) + } + Err(AllocationError::OutOfDeviceMemory) => continue, + Err(err) => return Err(err), + } + } + + Strategy::Buddy => { + let allocator = match &mut self.buddy_allocators[index as usize] { + Some(allocator) => allocator, + slot => { + let minimal_buddy_size = self + .minimal_buddy_size + .min(heap.size() / 1024) + .next_power_of_two(); + + let initial_buddy_dedicated_size = self + .initial_buddy_dedicated_size + .min(heap.size() / 32) + .next_power_of_two(); + + slot.get_or_insert(BuddyAllocator::new( + minimal_buddy_size, + initial_buddy_dedicated_size, + index, + memory_type.props, + if host_visible_non_coherent(memory_type.props) { + self.non_coherent_atom_mask + } else { + 0 + }, + )) + } + }; + let result = allocator.alloc( + device, + request.size, + request.align_mask, + flags, + heap, + &mut self.allocations_remains, + ); + + match result { + Ok(block) => { + return Ok(MemoryBlock::new( + index, + memory_type.props, + block.offset, + block.size, + atom_mask, + MemoryBlockFlavor::Buddy { + chunk: block.chunk, + ptr: block.ptr, + index: block.index, + memory: block.memory, + }, + )) + } + Err(AllocationError::OutOfDeviceMemory) => continue, + Err(err) => return Err(err), + } + } + } + } + + Err(AllocationError::OutOfDeviceMemory) + } + + /// Deallocates memory block previously allocated from this `GpuAllocator` instance. + /// + /// # Safety + /// + /// * Memory block must have been allocated by this `GpuAllocator` instance + /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance + /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance + /// and memory blocks allocated from it + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn dealloc(&mut self, device: &impl MemoryDevice<M>, block: MemoryBlock<M>) { + let memory_type = block.memory_type(); + let offset = block.offset(); + let size = block.size(); + let flavor = block.deallocate(); + match flavor { + MemoryBlockFlavor::Dedicated { memory } => { + let heap = self.memory_types[memory_type as usize].heap; + device.deallocate_memory(memory); + self.allocations_remains += 1; + self.memory_heaps[heap as usize].dealloc(size); + } + MemoryBlockFlavor::Buddy { + chunk, + ptr, + index, + memory, + } => { + let heap = self.memory_types[memory_type as usize].heap; + let heap = &mut self.memory_heaps[heap as usize]; + + let allocator = self.buddy_allocators[memory_type as usize] + .as_mut() + .expect("Allocator should exist"); + + allocator.dealloc( + device, + BuddyBlock { + memory, + ptr, + offset, + size, + chunk, + index, + }, + heap, + &mut self.allocations_remains, + ); + } + MemoryBlockFlavor::FreeList { chunk, ptr, memory } => { + let heap = self.memory_types[memory_type as usize].heap; + let heap = &mut self.memory_heaps[heap as usize]; + + let allocator = self.freelist_allocators[memory_type as usize] + .as_mut() + .expect("Allocator should exist"); + + allocator.dealloc( + device, + FreeListBlock { + memory, + ptr, + chunk, + offset, + size, + }, + heap, + &mut self.allocations_remains, + ); + } + } + } + + /// Deallocates leftover memory objects. + /// Should be used before dropping. + /// + /// # Safety + /// + /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance + /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance + /// and memory blocks allocated from it + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn cleanup(&mut self, device: &impl MemoryDevice<M>) { + for (index, allocator) in self + .freelist_allocators + .iter_mut() + .enumerate() + .filter_map(|(index, allocator)| Some((index, allocator.as_mut()?))) + { + let memory_type = &self.memory_types[index]; + let heap = memory_type.heap; + let heap = &mut self.memory_heaps[heap as usize]; + + allocator.cleanup(device, heap, &mut self.allocations_remains); + } + } +} + +fn host_visible_non_coherent(props: MemoryPropertyFlags) -> bool { + (props & (MemoryPropertyFlags::HOST_COHERENT | MemoryPropertyFlags::HOST_VISIBLE)) + == MemoryPropertyFlags::HOST_VISIBLE +} + +fn with_implicit_usage_flags(usage: UsageFlags) -> UsageFlags { + if usage.is_empty() { + UsageFlags::FAST_DEVICE_ACCESS + } else if usage.intersects(UsageFlags::DOWNLOAD | UsageFlags::UPLOAD) { + usage | UsageFlags::HOST_ACCESS + } else { + usage + } +} diff --git a/third_party/rust/gpu-alloc/src/block.rs b/third_party/rust/gpu-alloc/src/block.rs new file mode 100644 index 0000000000..7cac633bf1 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/block.rs @@ -0,0 +1,327 @@ +use { + crate::{align_down, align_up, error::MapError}, + alloc::sync::Arc, + core::{ + convert::TryFrom as _, + ptr::{copy_nonoverlapping, NonNull}, + // sync::atomic::{AtomicU8, Ordering::*}, + }, + gpu_alloc_types::{MappedMemoryRange, MemoryDevice, MemoryPropertyFlags}, +}; + +#[derive(Debug)] +struct Relevant; + +impl Drop for Relevant { + fn drop(&mut self) { + report_error_on_drop!("Memory block wasn't deallocated"); + } +} + +/// Memory block allocated by `GpuAllocator`. +#[derive(Debug)] +pub struct MemoryBlock<M> { + memory_type: u32, + props: MemoryPropertyFlags, + offset: u64, + size: u64, + atom_mask: u64, + mapped: bool, + flavor: MemoryBlockFlavor<M>, + relevant: Relevant, +} + +impl<M> MemoryBlock<M> { + pub(crate) fn new( + memory_type: u32, + props: MemoryPropertyFlags, + offset: u64, + size: u64, + atom_mask: u64, + flavor: MemoryBlockFlavor<M>, + ) -> Self { + isize::try_from(atom_mask).expect("`atom_mask` is too large"); + MemoryBlock { + memory_type, + props, + offset, + size, + atom_mask, + flavor, + mapped: false, + relevant: Relevant, + } + } + + pub(crate) fn deallocate(self) -> MemoryBlockFlavor<M> { + core::mem::forget(self.relevant); + self.flavor + } +} + +unsafe impl<M> Sync for MemoryBlock<M> where M: Sync {} +unsafe impl<M> Send for MemoryBlock<M> where M: Send {} + +#[derive(Debug)] +pub(crate) enum MemoryBlockFlavor<M> { + Dedicated { + memory: M, + }, + Buddy { + chunk: usize, + index: usize, + ptr: Option<NonNull<u8>>, + memory: Arc<M>, + }, + FreeList { + chunk: u64, + ptr: Option<NonNull<u8>>, + memory: Arc<M>, + }, +} + +impl<M> MemoryBlock<M> { + /// Returns reference to parent memory object. + #[inline(always)] + pub fn memory(&self) -> &M { + match &self.flavor { + MemoryBlockFlavor::Dedicated { memory } => memory, + MemoryBlockFlavor::Buddy { memory, .. } => &**memory, + MemoryBlockFlavor::FreeList { memory, .. } => &**memory, + } + } + + /// Returns offset in bytes from start of memory object to start of this block. + #[inline(always)] + pub fn offset(&self) -> u64 { + self.offset + } + + /// Returns size of this memory block. + #[inline(always)] + pub fn size(&self) -> u64 { + self.size + } + + /// Returns memory property flags for parent memory object. + #[inline(always)] + pub fn props(&self) -> MemoryPropertyFlags { + self.props + } + + /// Returns index of type of parent memory object. + #[inline(always)] + pub fn memory_type(&self) -> u32 { + self.memory_type + } + + /// Returns pointer to mapped memory range of this block. + /// This blocks becomes mapped. + /// + /// The user of returned pointer must guarantee that any previously submitted command that writes to this range has completed + /// before the host reads from or writes to that range, + /// and that any previously submitted command that reads from that range has completed + /// before the host writes to that region. + /// If the device memory was allocated without the `HOST_COHERENT` property flag set, + /// these guarantees must be made for an extended range: + /// the user must round down the start of the range to the nearest multiple of `non_coherent_atom_size`, + /// and round the end of the range up to the nearest multiple of `non_coherent_atom_size`. + /// + /// # Panics + /// + /// This function panics if block is currently mapped. + /// + /// # Safety + /// + /// `block` must have been allocated from specified `device`. + #[inline(always)] + pub unsafe fn map( + &mut self, + device: &impl MemoryDevice<M>, + offset: u64, + size: usize, + ) -> Result<NonNull<u8>, MapError> { + let size_u64 = u64::try_from(size).expect("`size` doesn't fit device address space"); + assert!(offset < self.size, "`offset` is out of memory block bounds"); + assert!( + size_u64 <= self.size - offset, + "`offset + size` is out of memory block bounds" + ); + + let ptr = match &mut self.flavor { + MemoryBlockFlavor::Dedicated { memory } => { + let end = align_up(offset + size_u64, self.atom_mask) + .expect("mapping end doesn't fit device address space"); + let aligned_offset = align_down(offset, self.atom_mask); + + if !acquire_mapping(&mut self.mapped) { + return Err(MapError::AlreadyMapped); + } + let result = + device.map_memory(memory, self.offset + aligned_offset, end - aligned_offset); + + match result { + // the overflow is checked in `Self::new()` + Ok(ptr) => { + let ptr_offset = (offset - aligned_offset) as isize; + ptr.as_ptr().offset(ptr_offset) + } + Err(err) => { + release_mapping(&mut self.mapped); + return Err(err.into()); + } + } + } + MemoryBlockFlavor::FreeList { ptr: Some(ptr), .. } + | MemoryBlockFlavor::Buddy { ptr: Some(ptr), .. } => { + if !acquire_mapping(&mut self.mapped) { + return Err(MapError::AlreadyMapped); + } + let offset_isize = isize::try_from(offset) + .expect("Buddy and linear block should fit host address space"); + ptr.as_ptr().offset(offset_isize) + } + _ => return Err(MapError::NonHostVisible), + }; + + Ok(NonNull::new_unchecked(ptr)) + } + + /// Unmaps memory range of this block that was previously mapped with `Block::map`. + /// This block becomes unmapped. + /// + /// # Panics + /// + /// This function panics if this block is not currently mapped. + /// + /// # Safety + /// + /// `block` must have been allocated from specified `device`. + #[inline(always)] + pub unsafe fn unmap(&mut self, device: &impl MemoryDevice<M>) -> bool { + if !release_mapping(&mut self.mapped) { + return false; + } + match &mut self.flavor { + MemoryBlockFlavor::Dedicated { memory } => { + device.unmap_memory(memory); + } + MemoryBlockFlavor::Buddy { .. } => {} + MemoryBlockFlavor::FreeList { .. } => {} + } + true + } + + /// Transiently maps block memory range and copies specified data + /// to the mapped memory range. + /// + /// # Panics + /// + /// This function panics if block is currently mapped. + /// + /// # Safety + /// + /// `block` must have been allocated from specified `device`. + /// The caller must guarantee that any previously submitted command that reads or writes to this range has completed. + #[inline(always)] + pub unsafe fn write_bytes( + &mut self, + device: &impl MemoryDevice<M>, + offset: u64, + data: &[u8], + ) -> Result<(), MapError> { + let size = data.len(); + let ptr = self.map(device, offset, size)?; + + copy_nonoverlapping(data.as_ptr(), ptr.as_ptr(), size); + let result = if !self.coherent() { + let aligned_offset = align_down(offset, self.atom_mask); + let end = align_up(offset + data.len() as u64, self.atom_mask).unwrap(); + + device.flush_memory_ranges(&[MappedMemoryRange { + memory: self.memory(), + offset: self.offset + aligned_offset, + size: end - aligned_offset, + }]) + } else { + Ok(()) + }; + + self.unmap(device); + result.map_err(Into::into) + } + + /// Transiently maps block memory range and copies specified data + /// from the mapped memory range. + /// + /// # Panics + /// + /// This function panics if block is currently mapped. + /// + /// # Safety + /// + /// `block` must have been allocated from specified `device`. + /// The caller must guarantee that any previously submitted command that reads to this range has completed. + #[inline(always)] + pub unsafe fn read_bytes( + &mut self, + device: &impl MemoryDevice<M>, + offset: u64, + data: &mut [u8], + ) -> Result<(), MapError> { + #[cfg(feature = "tracing")] + { + if !self.cached() { + tracing::warn!("Reading from non-cached memory may be slow. Consider allocating HOST_CACHED memory block for host reads.") + } + } + + let size = data.len(); + let ptr = self.map(device, offset, size)?; + let result = if !self.coherent() { + let aligned_offset = align_down(offset, self.atom_mask); + let end = align_up(offset + data.len() as u64, self.atom_mask).unwrap(); + + device.invalidate_memory_ranges(&[MappedMemoryRange { + memory: self.memory(), + offset: self.offset + aligned_offset, + size: end - aligned_offset, + }]) + } else { + Ok(()) + }; + if result.is_ok() { + copy_nonoverlapping(ptr.as_ptr(), data.as_mut_ptr(), size); + } + + self.unmap(device); + result.map_err(Into::into) + } + + fn coherent(&self) -> bool { + self.props.contains(MemoryPropertyFlags::HOST_COHERENT) + } + + #[cfg(feature = "tracing")] + fn cached(&self) -> bool { + self.props.contains(MemoryPropertyFlags::HOST_CACHED) + } +} + +fn acquire_mapping(mapped: &mut bool) -> bool { + if *mapped { + false + } else { + *mapped = true; + true + } +} + +fn release_mapping(mapped: &mut bool) -> bool { + if *mapped { + *mapped = false; + true + } else { + false + } +} diff --git a/third_party/rust/gpu-alloc/src/buddy.rs b/third_party/rust/gpu-alloc/src/buddy.rs new file mode 100644 index 0000000000..5691f4d96d --- /dev/null +++ b/third_party/rust/gpu-alloc/src/buddy.rs @@ -0,0 +1,458 @@ +use { + crate::{ + align_up, error::AllocationError, heap::Heap, slab::Slab, unreachable_unchecked, + util::try_arc_unwrap, MemoryBounds, + }, + alloc::{sync::Arc, vec::Vec}, + core::{convert::TryFrom as _, mem::replace, ptr::NonNull}, + gpu_alloc_types::{AllocationFlags, DeviceMapError, MemoryDevice, MemoryPropertyFlags}, +}; + +#[derive(Debug)] +pub(crate) struct BuddyBlock<M> { + pub memory: Arc<M>, + pub ptr: Option<NonNull<u8>>, + pub offset: u64, + pub size: u64, + pub chunk: usize, + pub index: usize, +} + +unsafe impl<M> Sync for BuddyBlock<M> where M: Sync {} +unsafe impl<M> Send for BuddyBlock<M> where M: Send {} + +#[derive(Clone, Copy, Debug)] +enum PairState { + Exhausted, + Ready { + ready: Side, + next: usize, + prev: usize, + }, +} + +impl PairState { + unsafe fn replace_next(&mut self, value: usize) -> usize { + match self { + PairState::Exhausted => unreachable_unchecked(), + PairState::Ready { next, .. } => replace(next, value), + } + } + + unsafe fn replace_prev(&mut self, value: usize) -> usize { + match self { + PairState::Exhausted => unreachable_unchecked(), + PairState::Ready { prev, .. } => replace(prev, value), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum Side { + Left, + Right, +} +use Side::*; + +#[derive(Debug)] +struct PairEntry { + state: PairState, + chunk: usize, + offset: u64, + parent: Option<usize>, +} + +struct SizeBlockEntry { + chunk: usize, + offset: u64, + index: usize, +} + +#[derive(Debug)] +struct Size { + next_ready: usize, + pairs: Slab<PairEntry>, +} +#[derive(Debug)] +enum Release { + None, + Parent(usize), + Chunk(usize), +} + +impl Size { + fn new() -> Self { + Size { + pairs: Slab::new(), + next_ready: 0, + } + } + + unsafe fn add_pair_and_acquire_left( + &mut self, + chunk: usize, + offset: u64, + parent: Option<usize>, + ) -> SizeBlockEntry { + if self.next_ready < self.pairs.len() { + unreachable_unchecked() + } + + let index = self.pairs.insert(PairEntry { + state: PairState::Exhausted, + chunk, + offset, + parent, + }); + + let entry = self.pairs.get_unchecked_mut(index); + entry.state = PairState::Ready { + next: index, + prev: index, + ready: Right, // Left is allocated. + }; + self.next_ready = index; + + SizeBlockEntry { + chunk, + offset, + index: index << 1, + } + } + + fn acquire(&mut self, size: u64) -> Option<SizeBlockEntry> { + if self.next_ready >= self.pairs.len() { + return None; + } + + let ready = self.next_ready; + + let entry = unsafe { self.pairs.get_unchecked_mut(ready) }; + let chunk = entry.chunk; + let offset = entry.offset; + + let bit = match entry.state { + PairState::Exhausted => unsafe { unreachable_unchecked() }, + PairState::Ready { ready, next, prev } => { + entry.state = PairState::Exhausted; + + if prev == self.next_ready { + // The only ready entry. + debug_assert_eq!(next, self.next_ready); + self.next_ready = self.pairs.len(); + } else { + let prev_entry = unsafe { self.pairs.get_unchecked_mut(prev) }; + let prev_next = unsafe { prev_entry.state.replace_next(next) }; + debug_assert_eq!(prev_next, self.next_ready); + + let next_entry = unsafe { self.pairs.get_unchecked_mut(next) }; + let next_prev = unsafe { next_entry.state.replace_prev(prev) }; + debug_assert_eq!(next_prev, self.next_ready); + + self.next_ready = next; + } + + match ready { + Left => 0, + Right => 1, + } + } + }; + + Some(SizeBlockEntry { + chunk, + offset: offset + bit as u64 * size, + index: (ready << 1) | bit as usize, + }) + } + + fn release(&mut self, index: usize) -> Release { + let side = match index & 1 { + 0 => Side::Left, + 1 => Side::Right, + _ => unsafe { unreachable_unchecked() }, + }; + let entry_index = index >> 1; + + let len = self.pairs.len(); + + let entry = self.pairs.get_mut(entry_index); + + let chunk = entry.chunk; + let offset = entry.offset; + let parent = entry.parent; + + match entry.state { + PairState::Exhausted => { + if self.next_ready == len { + entry.state = PairState::Ready { + ready: side, + next: entry_index, + prev: entry_index, + }; + self.next_ready = entry_index; + } else { + debug_assert!(self.next_ready < len); + + let next = self.next_ready; + let next_entry = unsafe { self.pairs.get_unchecked_mut(next) }; + let prev = unsafe { next_entry.state.replace_prev(entry_index) }; + + let prev_entry = unsafe { self.pairs.get_unchecked_mut(prev) }; + let prev_next = unsafe { prev_entry.state.replace_next(entry_index) }; + debug_assert_eq!(prev_next, next); + + let entry = unsafe { self.pairs.get_unchecked_mut(entry_index) }; + entry.state = PairState::Ready { + ready: side, + next, + prev, + }; + } + Release::None + } + + PairState::Ready { ready, .. } if ready == side => { + panic!("Attempt to dealloate already free block") + } + + PairState::Ready { next, prev, .. } => { + unsafe { + self.pairs.remove_unchecked(entry_index); + } + + if prev == entry_index { + debug_assert_eq!(next, entry_index); + self.next_ready = self.pairs.len(); + } else { + let prev_entry = unsafe { self.pairs.get_unchecked_mut(prev) }; + let prev_next = unsafe { prev_entry.state.replace_next(next) }; + debug_assert_eq!(prev_next, entry_index); + + let next_entry = unsafe { self.pairs.get_unchecked_mut(next) }; + let next_prev = unsafe { next_entry.state.replace_prev(prev) }; + debug_assert_eq!(next_prev, entry_index); + + self.next_ready = next; + } + + match parent { + Some(parent) => Release::Parent(parent), + None => { + debug_assert_eq!(offset, 0); + Release::Chunk(chunk) + } + } + } + } + } +} + +#[derive(Debug)] +struct Chunk<M> { + memory: Arc<M>, + ptr: Option<NonNull<u8>>, + size: u64, +} + +#[derive(Debug)] +pub(crate) struct BuddyAllocator<M> { + minimal_size: u64, + chunks: Slab<Chunk<M>>, + sizes: Vec<Size>, + memory_type: u32, + props: MemoryPropertyFlags, + atom_mask: u64, +} + +unsafe impl<M> Sync for BuddyAllocator<M> where M: Sync {} +unsafe impl<M> Send for BuddyAllocator<M> where M: Send {} + +impl<M> BuddyAllocator<M> +where + M: MemoryBounds + 'static, +{ + pub fn new( + minimal_size: u64, + initial_dedicated_size: u64, + memory_type: u32, + props: MemoryPropertyFlags, + atom_mask: u64, + ) -> Self { + assert!( + minimal_size.is_power_of_two(), + "Minimal allocation size of buddy allocator must be power of two" + ); + assert!( + initial_dedicated_size.is_power_of_two(), + "Dedicated allocation size of buddy allocator must be power of two" + ); + + let initial_sizes = (initial_dedicated_size + .trailing_zeros() + .saturating_sub(minimal_size.trailing_zeros())) as usize; + + BuddyAllocator { + minimal_size, + chunks: Slab::new(), + sizes: (0..initial_sizes).map(|_| Size::new()).collect(), + memory_type, + props, + atom_mask: atom_mask | (minimal_size - 1), + } + } + + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn alloc( + &mut self, + device: &impl MemoryDevice<M>, + size: u64, + align_mask: u64, + flags: AllocationFlags, + heap: &mut Heap, + allocations_remains: &mut u32, + ) -> Result<BuddyBlock<M>, AllocationError> { + let align_mask = align_mask | self.atom_mask; + + let size = align_up(size, align_mask) + .and_then(|size| size.checked_next_power_of_two()) + .ok_or(AllocationError::OutOfDeviceMemory)?; + + let size_index = size.trailing_zeros() - self.minimal_size.trailing_zeros(); + let size_index = + usize::try_from(size_index).map_err(|_| AllocationError::OutOfDeviceMemory)?; + + while self.sizes.len() <= size_index { + self.sizes.push(Size::new()); + } + + let host_visible = self.host_visible(); + + let mut candidate_size_index = size_index; + + let (mut entry, entry_size_index) = loop { + let sizes_len = self.sizes.len(); + + let candidate_size_entry = &mut self.sizes[candidate_size_index]; + let candidate_size = self.minimal_size << candidate_size_index; + + if let Some(entry) = candidate_size_entry.acquire(candidate_size) { + break (entry, candidate_size_index); + } + + if sizes_len == candidate_size_index + 1 { + // That's size of device allocation. + if *allocations_remains == 0 { + return Err(AllocationError::TooManyObjects); + } + + let chunk_size = self.minimal_size << (candidate_size_index + 1); + let mut memory = device.allocate_memory(chunk_size, self.memory_type, flags)?; + *allocations_remains -= 1; + heap.alloc(chunk_size); + + let ptr = if host_visible { + match device.map_memory(&mut memory, 0, chunk_size) { + Ok(ptr) => Some(ptr), + Err(DeviceMapError::OutOfDeviceMemory) => { + return Err(AllocationError::OutOfDeviceMemory) + } + Err(DeviceMapError::MapFailed) | Err(DeviceMapError::OutOfHostMemory) => { + return Err(AllocationError::OutOfHostMemory) + } + } + } else { + None + }; + + let chunk = self.chunks.insert(Chunk { + memory: Arc::new(memory), + ptr, + size: chunk_size, + }); + + let entry = candidate_size_entry.add_pair_and_acquire_left(chunk, 0, None); + + break (entry, candidate_size_index); + } + + candidate_size_index += 1; + }; + + for size_index in (size_index..entry_size_index).rev() { + let size_entry = &mut self.sizes[size_index]; + entry = + size_entry.add_pair_and_acquire_left(entry.chunk, entry.offset, Some(entry.index)); + } + + let chunk_entry = self.chunks.get_unchecked(entry.chunk); + + debug_assert!( + entry + .offset + .checked_add(size) + .map_or(false, |end| end <= chunk_entry.size), + "Offset + size is not in chunk bounds" + ); + + Ok(BuddyBlock { + memory: chunk_entry.memory.clone(), + ptr: chunk_entry + .ptr + .map(|ptr| NonNull::new_unchecked(ptr.as_ptr().add(entry.offset as usize))), + offset: entry.offset, + size, + chunk: entry.chunk, + index: entry.index, + }) + } + + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn dealloc( + &mut self, + device: &impl MemoryDevice<M>, + block: BuddyBlock<M>, + heap: &mut Heap, + allocations_remains: &mut u32, + ) { + debug_assert!(block.size.is_power_of_two()); + + let size_index = + (block.size.trailing_zeros() - self.minimal_size.trailing_zeros()) as usize; + + let mut release_index = block.index; + let mut release_size_index = size_index; + + loop { + match self.sizes[release_size_index].release(release_index) { + Release::Parent(parent) => { + release_size_index += 1; + release_index = parent; + } + Release::Chunk(chunk) => { + debug_assert_eq!(chunk, block.chunk); + debug_assert_eq!( + self.chunks.get(chunk).size, + self.minimal_size << (release_size_index + 1) + ); + let chunk = self.chunks.remove(chunk); + drop(block); + + let memory = try_arc_unwrap(chunk.memory) + .expect("Memory shared after last block deallocated"); + + device.deallocate_memory(memory); + *allocations_remains += 1; + heap.dealloc(chunk.size); + + return; + } + Release::None => return, + } + } + } + + fn host_visible(&self) -> bool { + self.props.contains(MemoryPropertyFlags::HOST_VISIBLE) + } +} diff --git a/third_party/rust/gpu-alloc/src/config.rs b/third_party/rust/gpu-alloc/src/config.rs new file mode 100644 index 0000000000..0d9da8070a --- /dev/null +++ b/third_party/rust/gpu-alloc/src/config.rs @@ -0,0 +1,77 @@ +/// Configuration for [`GpuAllocator`] +/// +/// [`GpuAllocator`]: type.GpuAllocator +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Config { + /// Size in bytes of request that will be served by dedicated memory object. + /// This value should be large enough to not exhaust memory object limit + /// and not use slow memory object allocation when it is not necessary. + pub dedicated_threshold: u64, + + /// Size in bytes of request that will be served by dedicated memory object if preferred. + /// This value should be large enough to not exhaust memory object limit + /// and not use slow memory object allocation when it is not necessary. + /// + /// This won't make much sense if this value is larger than `dedicated_threshold`. + pub preferred_dedicated_threshold: u64, + + /// Size in bytes of transient memory request that will be served by dedicated memory object. + /// This value should be large enough to not exhaust memory object limit + /// and not use slow memory object allocation when it is not necessary. + /// + /// This won't make much sense if this value is lesser than `dedicated_threshold`. + pub transient_dedicated_threshold: u64, + + /// Size in bytes of first chunk in free-list allocator. + pub starting_free_list_chunk: u64, + + /// Upper limit for size in bytes of chunks in free-list allocator. + pub final_free_list_chunk: u64, + + /// Minimal size for buddy allocator. + pub minimal_buddy_size: u64, + + /// Initial memory object size for buddy allocator. + /// If less than `minimal_buddy_size` then `minimal_buddy_size` is used instead. + pub initial_buddy_dedicated_size: u64, +} + +impl Config { + /// Returns default configuration. + /// + /// This is not `Default` implementation to discourage usage outside of + /// prototyping. + /// + /// Proper configuration should depend on hardware and intended usage.\ + /// But those values can be used as starting point.\ + /// Note that they can simply not work for some platforms with lesser + /// memory capacity than today's "modern" GPU (year 2020). + pub fn i_am_prototyping() -> Self { + // Assume that today's modern GPU is made of 1024 potatoes. + let potato = Config::i_am_potato(); + + Config { + dedicated_threshold: potato.dedicated_threshold * 1024, + preferred_dedicated_threshold: potato.preferred_dedicated_threshold * 1024, + transient_dedicated_threshold: potato.transient_dedicated_threshold * 1024, + starting_free_list_chunk: potato.starting_free_list_chunk * 1024, + final_free_list_chunk: potato.final_free_list_chunk * 1024, + minimal_buddy_size: potato.minimal_buddy_size * 1024, + initial_buddy_dedicated_size: potato.initial_buddy_dedicated_size * 1024, + } + } + + /// Returns default configuration for average sized potato. + pub fn i_am_potato() -> Self { + Config { + dedicated_threshold: 32 * 1024, + preferred_dedicated_threshold: 1024, + transient_dedicated_threshold: 128 * 1024, + starting_free_list_chunk: 8 * 1024, + final_free_list_chunk: 128 * 1024, + minimal_buddy_size: 1, + initial_buddy_dedicated_size: 8 * 1024, + } + } +} diff --git a/third_party/rust/gpu-alloc/src/error.rs b/third_party/rust/gpu-alloc/src/error.rs new file mode 100644 index 0000000000..95cde9bcaa --- /dev/null +++ b/third_party/rust/gpu-alloc/src/error.rs @@ -0,0 +1,116 @@ +use { + core::fmt::{self, Display}, + gpu_alloc_types::{DeviceMapError, OutOfMemory}, +}; + +/// Enumeration of possible errors that may occur during memory allocation. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum AllocationError { + /// Backend reported that device memory has been exhausted.\ + /// Deallocating device memory from the same heap may increase chance + /// that another allocation would succeed. + OutOfDeviceMemory, + + /// Backend reported that host memory has been exhausted.\ + /// Deallocating host memory may increase chance that another allocation would succeed. + OutOfHostMemory, + + /// Allocation request cannot be fulfilled as no available memory types allowed + /// by `Request.memory_types` mask is compatible with `request.usage`. + NoCompatibleMemoryTypes, + + /// Reached limit on allocated memory objects count.\ + /// Deallocating device memory may increase chance that another allocation would succeed. + /// Especially dedicated memory blocks. + /// + /// If this error is returned when memory heaps are far from exhausted + /// `Config` should be tweaked to allocate larger memory objects. + TooManyObjects, +} + +impl From<OutOfMemory> for AllocationError { + fn from(err: OutOfMemory) -> Self { + match err { + OutOfMemory::OutOfDeviceMemory => AllocationError::OutOfDeviceMemory, + OutOfMemory::OutOfHostMemory => AllocationError::OutOfHostMemory, + } + } +} + +impl Display for AllocationError { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + AllocationError::OutOfDeviceMemory => fmt.write_str("Device memory exhausted"), + AllocationError::OutOfHostMemory => fmt.write_str("Host memory exhausted"), + AllocationError::NoCompatibleMemoryTypes => fmt.write_str( + "No compatible memory types from requested types support requested usage", + ), + AllocationError::TooManyObjects => { + fmt.write_str("Reached limit on allocated memory objects count") + } + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for AllocationError {} + +/// Enumeration of possible errors that may occur during memory mapping. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum MapError { + /// Backend reported that device memory has been exhausted.\ + /// Deallocating device memory from the same heap may increase chance + /// that another mapping would succeed. + OutOfDeviceMemory, + + /// Backend reported that host memory has been exhausted.\ + /// Deallocating host memory may increase chance that another mapping would succeed. + OutOfHostMemory, + + /// Attempt to map memory block with non-host-visible memory type.\ + /// Ensure to include `UsageFlags::HOST_ACCESS` into allocation request + /// when memory mapping is intended. + NonHostVisible, + + /// Map failed for implementation specific reason.\ + /// For Vulkan backend this includes failed attempt + /// to allocate large enough virtual address space. + MapFailed, + + /// Mapping failed due to block being already mapped. + AlreadyMapped, +} + +impl From<DeviceMapError> for MapError { + fn from(err: DeviceMapError) -> Self { + match err { + DeviceMapError::OutOfDeviceMemory => MapError::OutOfDeviceMemory, + DeviceMapError::OutOfHostMemory => MapError::OutOfHostMemory, + DeviceMapError::MapFailed => MapError::MapFailed, + } + } +} + +impl From<OutOfMemory> for MapError { + fn from(err: OutOfMemory) -> Self { + match err { + OutOfMemory::OutOfDeviceMemory => MapError::OutOfDeviceMemory, + OutOfMemory::OutOfHostMemory => MapError::OutOfHostMemory, + } + } +} + +impl Display for MapError { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MapError::OutOfDeviceMemory => fmt.write_str("Device memory exhausted"), + MapError::OutOfHostMemory => fmt.write_str("Host memory exhausted"), + MapError::MapFailed => fmt.write_str("Failed to map memory object"), + MapError::NonHostVisible => fmt.write_str("Impossible to map non-host-visible memory"), + MapError::AlreadyMapped => fmt.write_str("Block is already mapped"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for MapError {} diff --git a/third_party/rust/gpu-alloc/src/freelist.rs b/third_party/rust/gpu-alloc/src/freelist.rs new file mode 100644 index 0000000000..448a961d4f --- /dev/null +++ b/third_party/rust/gpu-alloc/src/freelist.rs @@ -0,0 +1,528 @@ +use { + crate::{ + align_down, align_up, + error::AllocationError, + heap::Heap, + util::{arc_unwrap, is_arc_unique}, + MemoryBounds, + }, + alloc::{sync::Arc, vec::Vec}, + core::{cmp::Ordering, ptr::NonNull}, + gpu_alloc_types::{AllocationFlags, DeviceMapError, MemoryDevice, MemoryPropertyFlags}, +}; + +unsafe fn opt_ptr_add(ptr: Option<NonNull<u8>>, size: u64) -> Option<NonNull<u8>> { + ptr.map(|ptr| { + // Size is within memory region started at `ptr`. + // size is within `chunk_size` that fits `isize`. + NonNull::new_unchecked(ptr.as_ptr().offset(size as isize)) + }) +} + +#[derive(Debug)] +pub(super) struct FreeList<M> { + array: Vec<FreeListRegion<M>>, + counter: u64, +} + +impl<M> FreeList<M> { + pub fn new() -> Self { + FreeList { + array: Vec::new(), + counter: 0, + } + } + + pub fn get_block_from_new_memory( + &mut self, + memory: Arc<M>, + memory_size: u64, + ptr: Option<NonNull<u8>>, + align_mask: u64, + size: u64, + ) -> FreeListBlock<M> { + debug_assert!(size <= memory_size); + + self.counter += 1; + self.array.push(FreeListRegion { + memory, + ptr, + chunk: self.counter, + start: 0, + end: memory_size, + }); + self.get_block_at(self.array.len() - 1, align_mask, size) + } + + pub fn get_block(&mut self, align_mask: u64, size: u64) -> Option<FreeListBlock<M>> { + let (index, _) = self.array.iter().enumerate().rev().find(|(_, region)| { + match region.end.checked_sub(size) { + Some(start) => { + let aligned_start = align_down(start, align_mask); + aligned_start >= region.start + } + None => false, + } + })?; + + Some(self.get_block_at(index, align_mask, size)) + } + + fn get_block_at(&mut self, index: usize, align_mask: u64, size: u64) -> FreeListBlock<M> { + let region = &mut self.array[index]; + + let start = region.end - size; + let aligned_start = align_down(start, align_mask); + + if aligned_start > region.start { + let block = FreeListBlock { + offset: aligned_start, + size: region.end - aligned_start, + chunk: region.chunk, + ptr: unsafe { opt_ptr_add(region.ptr, aligned_start - region.start) }, + memory: region.memory.clone(), + }; + + region.end = aligned_start; + + block + } else { + debug_assert_eq!(aligned_start, region.start); + let region = self.array.remove(index); + region.into_block() + } + } + + pub fn insert_block(&mut self, block: FreeListBlock<M>) { + match self.array.binary_search_by(|b| b.cmp(&block)) { + Ok(_) => { + panic!("Overlapping block found in free list"); + } + Err(index) if self.array.len() > index => match &mut self.array[..=index] { + [] => unreachable!(), + [next] => { + debug_assert!(!next.is_suffix_block(&block)); + + if next.is_prefix_block(&block) { + next.merge_prefix_block(block); + } else { + self.array.insert(0, FreeListRegion::from_block(block)); + } + } + [.., prev, next] => { + debug_assert!(!prev.is_prefix_block(&block)); + debug_assert!(!next.is_suffix_block(&block)); + + if next.is_prefix_block(&block) { + next.merge_prefix_block(block); + + if prev.consecutive(&*next) { + let next = self.array.remove(index); + let prev = &mut self.array[index - 1]; + prev.merge(next); + } + } else if prev.is_suffix_block(&block) { + prev.merge_suffix_block(block); + } else { + self.array.insert(index, FreeListRegion::from_block(block)); + } + } + }, + Err(_) => match &mut self.array[..] { + [] => self.array.push(FreeListRegion::from_block(block)), + [.., prev] => { + debug_assert!(!prev.is_prefix_block(&block)); + if prev.is_suffix_block(&block) { + prev.merge_suffix_block(block); + } else { + self.array.push(FreeListRegion::from_block(block)); + } + } + }, + } + } + + pub fn drain(&mut self, keep_last: bool) -> Option<impl Iterator<Item = (M, u64)> + '_> { + // Time to deallocate + + let len = self.array.len(); + + let mut del = 0; + { + let regions = &mut self.array[..]; + + for i in 0..len { + if (i < len - 1 || !keep_last) && is_arc_unique(&mut regions[i].memory) { + del += 1; + } else if del > 0 { + regions.swap(i - del, i); + } + } + } + + if del > 0 { + Some(self.array.drain(len - del..).map(move |region| { + debug_assert_eq!(region.start, 0); + (unsafe { arc_unwrap(region.memory) }, region.end) + })) + } else { + None + } + } +} + +#[derive(Debug)] +struct FreeListRegion<M> { + memory: Arc<M>, + ptr: Option<NonNull<u8>>, + chunk: u64, + start: u64, + end: u64, +} + +unsafe impl<M> Sync for FreeListRegion<M> where M: Sync {} +unsafe impl<M> Send for FreeListRegion<M> where M: Send {} + +impl<M> FreeListRegion<M> { + pub fn cmp(&self, block: &FreeListBlock<M>) -> Ordering { + debug_assert_eq!( + Arc::ptr_eq(&self.memory, &block.memory), + self.chunk == block.chunk + ); + + if self.chunk == block.chunk { + debug_assert_eq!( + Ord::cmp(&self.start, &block.offset), + Ord::cmp(&self.end, &(block.offset + block.size)), + "Free region {{ start: {}, end: {} }} overlaps with block {{ offset: {}, size: {} }}", + self.start, + self.end, + block.offset, + block.size, + ); + } + + Ord::cmp(&self.chunk, &block.chunk).then(Ord::cmp(&self.start, &block.offset)) + } + + fn from_block(block: FreeListBlock<M>) -> Self { + FreeListRegion { + memory: block.memory, + chunk: block.chunk, + ptr: block.ptr, + start: block.offset, + end: block.offset + block.size, + } + } + + fn into_block(self) -> FreeListBlock<M> { + FreeListBlock { + memory: self.memory, + chunk: self.chunk, + ptr: self.ptr, + offset: self.start, + size: self.end - self.start, + } + } + + fn consecutive(&self, other: &Self) -> bool { + if self.chunk != other.chunk { + return false; + } + + debug_assert!(Arc::ptr_eq(&self.memory, &other.memory)); + + debug_assert_eq!( + Ord::cmp(&self.start, &other.start), + Ord::cmp(&self.end, &other.end) + ); + + self.end == other.start + } + + fn merge(&mut self, next: FreeListRegion<M>) { + debug_assert!(self.consecutive(&next)); + self.end = next.end; + } + + fn is_prefix_block(&self, block: &FreeListBlock<M>) -> bool { + if self.chunk != block.chunk { + return false; + } + + debug_assert!(Arc::ptr_eq(&self.memory, &block.memory)); + + debug_assert_eq!( + Ord::cmp(&self.start, &block.offset), + Ord::cmp(&self.end, &(block.offset + block.size)) + ); + + self.start == (block.offset + block.size) + } + + fn merge_prefix_block(&mut self, block: FreeListBlock<M>) { + debug_assert!(self.is_prefix_block(&block)); + self.start = block.offset; + self.ptr = block.ptr; + } + + fn is_suffix_block(&self, block: &FreeListBlock<M>) -> bool { + if self.chunk != block.chunk { + return false; + } + + debug_assert!(Arc::ptr_eq(&self.memory, &block.memory)); + + debug_assert_eq!( + Ord::cmp(&self.start, &block.offset), + Ord::cmp(&self.end, &(block.offset + block.size)) + ); + + self.end == block.offset + } + + fn merge_suffix_block(&mut self, block: FreeListBlock<M>) { + debug_assert!(self.is_suffix_block(&block)); + self.end += block.size; + } +} + +#[derive(Debug)] +pub struct FreeListBlock<M> { + pub memory: Arc<M>, + pub ptr: Option<NonNull<u8>>, + pub chunk: u64, + pub offset: u64, + pub size: u64, +} + +unsafe impl<M> Sync for FreeListBlock<M> where M: Sync {} +unsafe impl<M> Send for FreeListBlock<M> where M: Send {} + +#[derive(Debug)] +pub(crate) struct FreeListAllocator<M> { + freelist: FreeList<M>, + chunk_size: u64, + final_chunk_size: u64, + memory_type: u32, + props: MemoryPropertyFlags, + atom_mask: u64, + + total_allocations: u64, + total_deallocations: u64, +} + +impl<M> Drop for FreeListAllocator<M> { + fn drop(&mut self) { + match Ord::cmp(&self.total_allocations, &self.total_deallocations) { + Ordering::Equal => {} + Ordering::Greater => { + report_error_on_drop!("Not all blocks were deallocated") + } + Ordering::Less => { + report_error_on_drop!("More blocks deallocated than allocated") + } + } + + if !self.freelist.array.is_empty() { + report_error_on_drop!( + "FreeListAllocator has free blocks on drop. Allocator should be cleaned" + ); + } + } +} + +impl<M> FreeListAllocator<M> +where + M: MemoryBounds + 'static, +{ + pub fn new( + starting_chunk_size: u64, + final_chunk_size: u64, + memory_type: u32, + props: MemoryPropertyFlags, + atom_mask: u64, + ) -> Self { + debug_assert_eq!( + align_down(starting_chunk_size, atom_mask), + starting_chunk_size + ); + + let starting_chunk_size = min(starting_chunk_size, isize::max_value()); + + debug_assert_eq!(align_down(final_chunk_size, atom_mask), final_chunk_size); + let final_chunk_size = min(final_chunk_size, isize::max_value()); + + FreeListAllocator { + freelist: FreeList::new(), + chunk_size: starting_chunk_size, + final_chunk_size, + memory_type, + props, + atom_mask, + + total_allocations: 0, + total_deallocations: 0, + } + } + + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn alloc( + &mut self, + device: &impl MemoryDevice<M>, + size: u64, + align_mask: u64, + flags: AllocationFlags, + heap: &mut Heap, + allocations_remains: &mut u32, + ) -> Result<FreeListBlock<M>, AllocationError> { + debug_assert!( + self.final_chunk_size >= size, + "GpuAllocator must not request allocations equal or greater to chunks size" + ); + + let size = align_up(size, self.atom_mask).expect( + "Any value not greater than final chunk size (which is aligned) has to fit for alignment", + ); + + let align_mask = align_mask | self.atom_mask; + let host_visible = self.host_visible(); + + if size <= self.chunk_size { + // Otherwise there can't be any sufficiently large free blocks + if let Some(block) = self.freelist.get_block(align_mask, size) { + self.total_allocations += 1; + return Ok(block); + } + } + + // New allocation is required. + if *allocations_remains == 0 { + return Err(AllocationError::TooManyObjects); + } + + if size > self.chunk_size { + let multiple = (size - 1) / self.chunk_size + 1; + let multiple = multiple.next_power_of_two(); + + self.chunk_size = (self.chunk_size * multiple).min(self.final_chunk_size); + } + + let mut memory = device.allocate_memory(self.chunk_size, self.memory_type, flags)?; + *allocations_remains -= 1; + heap.alloc(self.chunk_size); + + // Map host visible allocations + let ptr = if host_visible { + match device.map_memory(&mut memory, 0, self.chunk_size) { + Ok(ptr) => Some(ptr), + Err(DeviceMapError::MapFailed) => { + #[cfg(feature = "tracing")] + tracing::error!("Failed to map host-visible memory in linear allocator"); + device.deallocate_memory(memory); + *allocations_remains += 1; + heap.dealloc(self.chunk_size); + + return Err(AllocationError::OutOfHostMemory); + } + Err(DeviceMapError::OutOfDeviceMemory) => { + return Err(AllocationError::OutOfDeviceMemory); + } + Err(DeviceMapError::OutOfHostMemory) => { + return Err(AllocationError::OutOfHostMemory); + } + } + } else { + None + }; + + let memory = Arc::new(memory); + let block = + self.freelist + .get_block_from_new_memory(memory, self.chunk_size, ptr, align_mask, size); + + if self.chunk_size < self.final_chunk_size { + // Double next chunk size + // Limit to final value. + self.chunk_size = (self.chunk_size * 2).min(self.final_chunk_size); + } + + self.total_allocations += 1; + Ok(block) + } + + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn dealloc( + &mut self, + device: &impl MemoryDevice<M>, + block: FreeListBlock<M>, + heap: &mut Heap, + allocations_remains: &mut u32, + ) { + debug_assert!(block.size < self.chunk_size); + debug_assert_ne!(block.size, 0); + self.freelist.insert_block(block); + self.total_deallocations += 1; + + if let Some(memory) = self.freelist.drain(true) { + memory.for_each(|(memory, size)| { + device.deallocate_memory(memory); + *allocations_remains += 1; + heap.dealloc(size); + }); + } + } + + /// Deallocates leftover memory objects. + /// Should be used before dropping. + /// + /// # Safety + /// + /// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance + /// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance + /// and memory blocks allocated from it + #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))] + pub unsafe fn cleanup( + &mut self, + device: &impl MemoryDevice<M>, + heap: &mut Heap, + allocations_remains: &mut u32, + ) { + if let Some(memory) = self.freelist.drain(false) { + memory.for_each(|(memory, size)| { + device.deallocate_memory(memory); + *allocations_remains += 1; + heap.dealloc(size); + }); + } + + #[cfg(feature = "tracing")] + { + if self.total_allocations == self.total_deallocations && !self.freelist.array.is_empty() + { + tracing::error!( + "Some regions were not deallocated on cleanup, although all blocks are free. + This is a bug in `FreeBlockAllocator`. + See array of free blocks left: + {:#?}", + self.freelist.array, + ); + } + } + } + + fn host_visible(&self) -> bool { + self.props.contains(MemoryPropertyFlags::HOST_VISIBLE) + } +} + +fn min<L, R>(l: L, r: R) -> L +where + R: core::convert::TryInto<L>, + L: Ord, +{ + match r.try_into() { + Ok(r) => core::cmp::min(l, r), + Err(_) => l, + } +} diff --git a/third_party/rust/gpu-alloc/src/heap.rs b/third_party/rust/gpu-alloc/src/heap.rs new file mode 100644 index 0000000000..fca7538919 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/heap.rs @@ -0,0 +1,32 @@ +#[derive(Debug)] +pub(crate) struct Heap { + size: u64, + used: u64, + allocated: u128, + deallocated: u128, +} + +impl Heap { + pub(crate) fn new(size: u64) -> Self { + Heap { + size, + used: 0, + allocated: 0, + deallocated: 0, + } + } + + pub(crate) fn size(&mut self) -> u64 { + self.size + } + + pub(crate) fn alloc(&mut self, size: u64) { + self.used += size; + self.allocated += u128::from(size); + } + + pub(crate) fn dealloc(&mut self, size: u64) { + self.used -= size; + self.deallocated += u128::from(size); + } +} diff --git a/third_party/rust/gpu-alloc/src/lib.rs b/third_party/rust/gpu-alloc/src/lib.rs new file mode 100644 index 0000000000..97d04fac7a --- /dev/null +++ b/third_party/rust/gpu-alloc/src/lib.rs @@ -0,0 +1,124 @@ +//! +//! Implementation agnostic memory allocator for Vulkan like APIs. +//! +//! This crate is intended to be used as part of safe API implementations.\ +//! Use with caution. There are unsafe functions all over the place. +//! +//! # Usage +//! +//! Start with fetching `DeviceProperties` from `gpu-alloc-<backend>` crate for the backend of choice.\ +//! Then create `GpuAllocator` instance and use it for all device memory allocations.\ +//! `GpuAllocator` will take care for all necessary bookkeeping like memory object count limit, +//! heap budget and memory mapping. +//! +//! ### Backends implementations +//! +//! Backend supporting crates should not depend on this crate.\ +//! Instead they should depend on `gpu-alloc-types` which is much more stable, +//! allowing to upgrade `gpu-alloc` version without `gpu-alloc-<backend>` upgrade. +//! + +#![cfg_attr(not(feature = "std"), no_std)] + +extern crate alloc; + +#[cfg(feature = "tracing")] +macro_rules! report_error_on_drop { + ($($tokens:tt)*) => {{ + #[cfg(feature = "std")] + { + if std::thread::panicking() { + return; + } + } + + tracing::error!($($tokens)*) + }}; +} + +#[cfg(all(not(feature = "tracing"), feature = "std"))] +macro_rules! report_error_on_drop { + ($($tokens:tt)*) => {{ + if std::thread::panicking() { + return; + } + eprintln!($($tokens)*) + }}; +} + +#[cfg(all(not(feature = "tracing"), not(feature = "std")))] +macro_rules! report_error_on_drop { + ($($tokens:tt)*) => {{ + panic!($($tokens)*) + }}; +} + +mod allocator; +mod block; +mod buddy; +mod config; +mod error; +mod freelist; +mod heap; +mod slab; +mod usage; +mod util; + +pub use { + self::{allocator::*, block::MemoryBlock, config::*, error::*, usage::*}, + gpu_alloc_types::*, +}; + +/// Memory request for allocator. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct Request { + /// Minimal size of memory block required. + /// Returned block may have larger size, + /// use `MemoryBlock::size` to learn actual size of returned block. + pub size: u64, + + /// Minimal alignment mask required. + /// Returned block may have larger alignment, + /// use `MemoryBlock::align` to learn actual alignment of returned block. + pub align_mask: u64, + + /// Intended memory usage. + /// Returned block may support additional usages, + /// use `MemoryBlock::props` to learn memory properties of returned block. + pub usage: UsageFlags, + + /// Bitset for memory types. + /// Returned block will be from memory type corresponding to one of set bits, + /// use `MemoryBlock::memory_type` to learn memory type index of returned block. + pub memory_types: u32, +} + +/// Aligns `value` up to `align_mask` +/// Returns smallest integer not lesser than `value` aligned by `align_mask`. +/// Returns `None` on overflow. +pub(crate) fn align_up(value: u64, align_mask: u64) -> Option<u64> { + Some(value.checked_add(align_mask)? & !align_mask) +} + +/// Align `value` down to `align_mask` +/// Returns largest integer not bigger than `value` aligned by `align_mask`. +pub(crate) fn align_down(value: u64, align_mask: u64) -> u64 { + value & !align_mask +} + +#[cfg(debug_assertions)] +#[allow(unused_unsafe)] +unsafe fn unreachable_unchecked() -> ! { + unreachable!() +} + +#[cfg(not(debug_assertions))] +unsafe fn unreachable_unchecked() -> ! { + core::hint::unreachable_unchecked() +} + +// #[cfg(feature = "tracing")] +use core::fmt::Debug as MemoryBounds; + +// #[cfg(not(feature = "tracing"))] +// use core::any::Any as MemoryBounds; diff --git a/third_party/rust/gpu-alloc/src/slab.rs b/third_party/rust/gpu-alloc/src/slab.rs new file mode 100644 index 0000000000..b65c73e656 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/slab.rs @@ -0,0 +1,97 @@ +use {crate::unreachable_unchecked, alloc::vec::Vec, core::mem::replace}; + +#[derive(Debug)] +enum Entry<T> { + Vacant(usize), + Occupied(T), +} +#[derive(Debug)] +pub(crate) struct Slab<T> { + next_vacant: usize, + entries: Vec<Entry<T>>, +} + +impl<T> Slab<T> { + pub fn new() -> Self { + Slab { + next_vacant: !0, + entries: Vec::new(), + } + } + + /// Inserts value into this linked vec and returns index + /// at which value can be accessed in constant time. + pub fn insert(&mut self, value: T) -> usize { + if self.next_vacant >= self.entries.len() { + self.entries.push(Entry::Occupied(value)); + self.entries.len() - 1 + } else { + match *unsafe { self.entries.get_unchecked(self.next_vacant) } { + Entry::Vacant(next_vacant) => { + unsafe { + *self.entries.get_unchecked_mut(self.next_vacant) = Entry::Occupied(value); + } + replace(&mut self.next_vacant, next_vacant) + } + _ => unsafe { unreachable_unchecked() }, + } + } + } + + pub fn len(&self) -> usize { + self.entries.len() + } + + pub unsafe fn get_unchecked(&self, index: usize) -> &T { + debug_assert!(index < self.len()); + + match self.entries.get_unchecked(index) { + Entry::Occupied(value) => value, + _ => unreachable_unchecked(), + } + } + + pub unsafe fn get_unchecked_mut(&mut self, index: usize) -> &mut T { + debug_assert!(index < self.len()); + + match self.entries.get_unchecked_mut(index) { + Entry::Occupied(value) => value, + _ => unreachable_unchecked(), + } + } + + pub fn get(&self, index: usize) -> &T { + match self.entries.get(index) { + Some(Entry::Occupied(value)) => value, + _ => panic!("Invalid index"), + } + } + + pub fn get_mut(&mut self, index: usize) -> &mut T { + match self.entries.get_mut(index) { + Some(Entry::Occupied(value)) => value, + _ => panic!("Invalid index"), + } + } + + pub unsafe fn remove_unchecked(&mut self, index: usize) -> T { + let entry = replace( + self.entries.get_unchecked_mut(index), + Entry::Vacant(self.next_vacant), + ); + + self.next_vacant = index; + + match entry { + Entry::Occupied(value) => value, + _ => unreachable_unchecked(), + } + } + + pub fn remove(&mut self, index: usize) -> T { + match self.entries.get_mut(index) { + Some(Entry::Occupied(_)) => unsafe { self.remove_unchecked(index) }, + _ => panic!("Invalid index"), + } + } +} diff --git a/third_party/rust/gpu-alloc/src/usage.rs b/third_party/rust/gpu-alloc/src/usage.rs new file mode 100644 index 0000000000..9834ef41c2 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/usage.rs @@ -0,0 +1,167 @@ +use { + core::fmt::{self, Debug}, + gpu_alloc_types::{MemoryPropertyFlags, MemoryType}, +}; + +bitflags::bitflags! { + /// Memory usage type. + /// Bits set define intended usage for requested memory. + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + pub struct UsageFlags: u8 { + /// Hints for allocator to find memory with faster device access. + /// If no flags is specified than `FAST_DEVICE_ACCESS` is implied. + const FAST_DEVICE_ACCESS = 0x01; + + /// Memory will be accessed from host. + /// This flags guarantees that host memory operations will be available. + /// Otherwise implementation is encouraged to use non-host-accessible memory. + const HOST_ACCESS = 0x02; + + /// Hints allocator that memory will be used for data downloading. + /// Allocator will strongly prefer host-cached memory. + /// Implies `HOST_ACCESS` flag. + const DOWNLOAD = 0x04; + + /// Hints allocator that memory will be used for data uploading. + /// If `DOWNLOAD` flag is not set then allocator will assume that + /// host will access memory in write-only manner and may + /// pick not host-cached. + /// Implies `HOST_ACCESS` flag. + const UPLOAD = 0x08; + + /// Hints allocator that memory will be used for short duration + /// allowing to use faster algorithm with less memory overhead. + /// If use holds returned memory block for too long then + /// effective memory overhead increases instead. + /// Best use case is for staging buffer for single batch of operations. + const TRANSIENT = 0x10; + + /// Requests memory that can be addressed with `u64`. + /// Allows fetching device address for resources bound to that memory. + const DEVICE_ADDRESS = 0x20; + } +} + +#[derive(Clone, Copy, Debug)] +struct MemoryForOneUsage { + mask: u32, + types: [u32; 32], + types_count: u32, +} + +pub(crate) struct MemoryForUsage { + usages: [MemoryForOneUsage; 64], +} + +impl Debug for MemoryForUsage { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("MemoryForUsage") + .field("usages", &&self.usages[..]) + .finish() + } +} + +impl MemoryForUsage { + pub fn new(memory_types: &[MemoryType]) -> Self { + assert!( + memory_types.len() <= 32, + "Only up to 32 memory types supported" + ); + + let mut mfu = MemoryForUsage { + usages: [MemoryForOneUsage { + mask: 0, + types: [0; 32], + types_count: 0, + }; 64], + }; + + for usage in 0..64 { + mfu.usages[usage as usize] = + one_usage(UsageFlags::from_bits_truncate(usage), memory_types); + } + + mfu + } + + /// Returns mask with bits set for memory type indices that support the + /// usage. + pub fn mask(&self, usage: UsageFlags) -> u32 { + self.usages[usage.bits() as usize].mask + } + + /// Returns slice of memory type indices that support the usage. + /// Earlier memory type has priority over later. + pub fn types(&self, usage: UsageFlags) -> &[u32] { + let usage = &self.usages[usage.bits() as usize]; + &usage.types[..usage.types_count as usize] + } +} + +fn one_usage(usage: UsageFlags, memory_types: &[MemoryType]) -> MemoryForOneUsage { + let mut types = [0; 32]; + let mut types_count = 0; + + for (index, mt) in memory_types.iter().enumerate() { + if compatible(usage, mt.props) { + types[types_count as usize] = index as u32; + types_count += 1; + } + } + + types[..types_count as usize] + .sort_unstable_by_key(|&index| priority(usage, memory_types[index as usize].props)); + + let mask = types[..types_count as usize] + .iter() + .fold(0u32, |mask, index| mask | 1u32 << index); + + MemoryForOneUsage { + mask, + types, + types_count, + } +} + +fn compatible(usage: UsageFlags, flags: MemoryPropertyFlags) -> bool { + type Flags = MemoryPropertyFlags; + if flags.contains(Flags::LAZILY_ALLOCATED) || flags.contains(Flags::PROTECTED) { + // Unsupported + false + } else if usage.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD) + { + // Requires HOST_VISIBLE + flags.contains(Flags::HOST_VISIBLE) + } else { + true + } +} + +/// Returns priority of memory with specified flags for specified usage. +/// Lesser value returned = more prioritized. +fn priority(usage: UsageFlags, flags: MemoryPropertyFlags) -> u32 { + type Flags = MemoryPropertyFlags; + + // Highly prefer device local memory when `FAST_DEVICE_ACCESS` usage is specified + // or usage is empty. + let device_local: bool = flags.contains(Flags::DEVICE_LOCAL) + ^ (usage.is_empty() || usage.contains(UsageFlags::FAST_DEVICE_ACCESS)); + + assert!( + flags.contains(Flags::HOST_VISIBLE) + || !usage + .intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD) + ); + + // Prefer cached memory for downloads. + // Or non-cached if downloads are not expected. + let cached: bool = flags.contains(Flags::HOST_CACHED) ^ usage.contains(UsageFlags::DOWNLOAD); + + // Prefer coherent for both uploads and downloads. + // Prefer non-coherent if neither flags is set. + let coherent: bool = flags.contains(Flags::HOST_COHERENT) + ^ (usage.intersects(UsageFlags::UPLOAD | UsageFlags::DOWNLOAD)); + + // Each boolean is false if flags are preferred. + device_local as u32 * 4 + cached as u32 * 2 + coherent as u32 +} diff --git a/third_party/rust/gpu-alloc/src/util.rs b/third_party/rust/gpu-alloc/src/util.rs new file mode 100644 index 0000000000..f02259d958 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/util.rs @@ -0,0 +1,44 @@ +use alloc::sync::Arc; + +/// Guarantees uniqueness only if `Weak` pointers are never created +/// from this `Arc` or clones. +pub(crate) fn is_arc_unique<M>(arc: &mut Arc<M>) -> bool { + let strong_count = Arc::strong_count(&*arc); + debug_assert_ne!(strong_count, 0, "This Arc should exist"); + + debug_assert!( + strong_count > 1 || Arc::get_mut(arc).is_some(), + "`Weak` pointer exists" + ); + + strong_count == 1 +} + +/// Can be used instead of `Arc::try_unwrap(arc).unwrap()` +/// when it is guaranteed to succeed. +pub(crate) unsafe fn arc_unwrap<M>(mut arc: Arc<M>) -> M { + use core::{mem::ManuallyDrop, ptr::read}; + debug_assert!(is_arc_unique(&mut arc)); + + // Get raw pointer to inner value. + let raw = Arc::into_raw(arc); + + // As `Arc` is unique and no Weak pointers exist + // it won't be dereferenced elsewhere. + let inner = read(raw); + + // Cast to `ManuallyDrop` which guarantees to have same layout + // and will skip dropping. + drop(Arc::from_raw(raw as *const ManuallyDrop<M>)); + inner +} + +/// Can be used instead of `Arc::try_unwrap` +/// only if `Weak` pointers are never created from this `Arc` or clones. +pub(crate) unsafe fn try_arc_unwrap<M>(mut arc: Arc<M>) -> Option<M> { + if is_arc_unique(&mut arc) { + Some(arc_unwrap(arc)) + } else { + None + } +} |