diff options
Diffstat (limited to '')
-rw-r--r-- | third_party/rust/gpu-alloc/src/usage.rs | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/third_party/rust/gpu-alloc/src/usage.rs b/third_party/rust/gpu-alloc/src/usage.rs new file mode 100644 index 0000000000..9834ef41c2 --- /dev/null +++ b/third_party/rust/gpu-alloc/src/usage.rs @@ -0,0 +1,167 @@ +use { + core::fmt::{self, Debug}, + gpu_alloc_types::{MemoryPropertyFlags, MemoryType}, +}; + +bitflags::bitflags! { + /// Memory usage type. + /// Bits set define intended usage for requested memory. + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] + pub struct UsageFlags: u8 { + /// Hints for allocator to find memory with faster device access. + /// If no flags is specified than `FAST_DEVICE_ACCESS` is implied. + const FAST_DEVICE_ACCESS = 0x01; + + /// Memory will be accessed from host. + /// This flags guarantees that host memory operations will be available. + /// Otherwise implementation is encouraged to use non-host-accessible memory. + const HOST_ACCESS = 0x02; + + /// Hints allocator that memory will be used for data downloading. + /// Allocator will strongly prefer host-cached memory. + /// Implies `HOST_ACCESS` flag. + const DOWNLOAD = 0x04; + + /// Hints allocator that memory will be used for data uploading. + /// If `DOWNLOAD` flag is not set then allocator will assume that + /// host will access memory in write-only manner and may + /// pick not host-cached. + /// Implies `HOST_ACCESS` flag. + const UPLOAD = 0x08; + + /// Hints allocator that memory will be used for short duration + /// allowing to use faster algorithm with less memory overhead. + /// If use holds returned memory block for too long then + /// effective memory overhead increases instead. + /// Best use case is for staging buffer for single batch of operations. + const TRANSIENT = 0x10; + + /// Requests memory that can be addressed with `u64`. + /// Allows fetching device address for resources bound to that memory. + const DEVICE_ADDRESS = 0x20; + } +} + +#[derive(Clone, Copy, Debug)] +struct MemoryForOneUsage { + mask: u32, + types: [u32; 32], + types_count: u32, +} + +pub(crate) struct MemoryForUsage { + usages: [MemoryForOneUsage; 64], +} + +impl Debug for MemoryForUsage { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("MemoryForUsage") + .field("usages", &&self.usages[..]) + .finish() + } +} + +impl MemoryForUsage { + pub fn new(memory_types: &[MemoryType]) -> Self { + assert!( + memory_types.len() <= 32, + "Only up to 32 memory types supported" + ); + + let mut mfu = MemoryForUsage { + usages: [MemoryForOneUsage { + mask: 0, + types: [0; 32], + types_count: 0, + }; 64], + }; + + for usage in 0..64 { + mfu.usages[usage as usize] = + one_usage(UsageFlags::from_bits_truncate(usage), memory_types); + } + + mfu + } + + /// Returns mask with bits set for memory type indices that support the + /// usage. + pub fn mask(&self, usage: UsageFlags) -> u32 { + self.usages[usage.bits() as usize].mask + } + + /// Returns slice of memory type indices that support the usage. + /// Earlier memory type has priority over later. + pub fn types(&self, usage: UsageFlags) -> &[u32] { + let usage = &self.usages[usage.bits() as usize]; + &usage.types[..usage.types_count as usize] + } +} + +fn one_usage(usage: UsageFlags, memory_types: &[MemoryType]) -> MemoryForOneUsage { + let mut types = [0; 32]; + let mut types_count = 0; + + for (index, mt) in memory_types.iter().enumerate() { + if compatible(usage, mt.props) { + types[types_count as usize] = index as u32; + types_count += 1; + } + } + + types[..types_count as usize] + .sort_unstable_by_key(|&index| priority(usage, memory_types[index as usize].props)); + + let mask = types[..types_count as usize] + .iter() + .fold(0u32, |mask, index| mask | 1u32 << index); + + MemoryForOneUsage { + mask, + types, + types_count, + } +} + +fn compatible(usage: UsageFlags, flags: MemoryPropertyFlags) -> bool { + type Flags = MemoryPropertyFlags; + if flags.contains(Flags::LAZILY_ALLOCATED) || flags.contains(Flags::PROTECTED) { + // Unsupported + false + } else if usage.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD) + { + // Requires HOST_VISIBLE + flags.contains(Flags::HOST_VISIBLE) + } else { + true + } +} + +/// Returns priority of memory with specified flags for specified usage. +/// Lesser value returned = more prioritized. +fn priority(usage: UsageFlags, flags: MemoryPropertyFlags) -> u32 { + type Flags = MemoryPropertyFlags; + + // Highly prefer device local memory when `FAST_DEVICE_ACCESS` usage is specified + // or usage is empty. + let device_local: bool = flags.contains(Flags::DEVICE_LOCAL) + ^ (usage.is_empty() || usage.contains(UsageFlags::FAST_DEVICE_ACCESS)); + + assert!( + flags.contains(Flags::HOST_VISIBLE) + || !usage + .intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD) + ); + + // Prefer cached memory for downloads. + // Or non-cached if downloads are not expected. + let cached: bool = flags.contains(Flags::HOST_CACHED) ^ usage.contains(UsageFlags::DOWNLOAD); + + // Prefer coherent for both uploads and downloads. + // Prefer non-coherent if neither flags is set. + let coherent: bool = flags.contains(Flags::HOST_COHERENT) + ^ (usage.intersects(UsageFlags::UPLOAD | UsageFlags::DOWNLOAD)); + + // Each boolean is false if flags are preferred. + device_local as u32 * 4 + cached as u32 * 2 + coherent as u32 +} |