diff options
Diffstat (limited to 'src/doc/nomicon/src/vec/vec-zsts.md')
-rw-r--r-- | src/doc/nomicon/src/vec/vec-zsts.md | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/src/doc/nomicon/src/vec/vec-zsts.md b/src/doc/nomicon/src/vec/vec-zsts.md new file mode 100644 index 000000000..73a97ba49 --- /dev/null +++ b/src/doc/nomicon/src/vec/vec-zsts.md @@ -0,0 +1,217 @@ +# Handling Zero-Sized Types + +It's time. We're going to fight the specter that is zero-sized types. Safe Rust +*never* needs to care about this, but Vec is very intensive on raw pointers and +raw allocations, which are exactly the two things that care about +zero-sized types. We need to be careful of two things: + +* The raw allocator API has undefined behavior if you pass in 0 for an + allocation size. +* raw pointer offsets are no-ops for zero-sized types, which will break our + C-style pointer iterator. + +Thankfully we abstracted out pointer-iterators and allocating handling into +`RawValIter` and `RawVec` respectively. How mysteriously convenient. + +## Allocating Zero-Sized Types + +So if the allocator API doesn't support zero-sized allocations, what on earth +do we store as our allocation? `NonNull::dangling()` of course! Almost every operation +with a ZST is a no-op since ZSTs have exactly one value, and therefore no state needs +to be considered to store or load them. This actually extends to `ptr::read` and +`ptr::write`: they won't actually look at the pointer at all. As such we never need +to change the pointer. + +Note however that our previous reliance on running out of memory before overflow is +no longer valid with zero-sized types. We must explicitly guard against capacity +overflow for zero-sized types. + +Due to our current architecture, all this means is writing 3 guards, one in each +method of `RawVec`. + +<!-- ignore: simplified code --> +```rust,ignore +impl<T> RawVec<T> { + fn new() -> Self { + // !0 is usize::MAX. This branch should be stripped at compile time. + let cap = if mem::size_of::<T>() == 0 { !0 } else { 0 }; + + // `NonNull::dangling()` doubles as "unallocated" and "zero-sized allocation" + RawVec { + ptr: NonNull::dangling(), + cap: cap, + _marker: PhantomData, + } + } + + fn grow(&mut self) { + // since we set the capacity to usize::MAX when T has size 0, + // getting to here necessarily means the Vec is overfull. + assert!(mem::size_of::<T>() != 0, "capacity overflow"); + + let (new_cap, new_layout) = if self.cap == 0 { + (1, Layout::array::<T>(1).unwrap()) + } else { + // This can't overflow because we ensure self.cap <= isize::MAX. + let new_cap = 2 * self.cap; + + // `Layout::array` checks that the number of bytes is <= usize::MAX, + // but this is redundant since old_layout.size() <= isize::MAX, + // so the `unwrap` should never fail. + let new_layout = Layout::array::<T>(new_cap).unwrap(); + (new_cap, new_layout) + }; + + // Ensure that the new allocation doesn't exceed `isize::MAX` bytes. + assert!(new_layout.size() <= isize::MAX as usize, "Allocation too large"); + + let new_ptr = if self.cap == 0 { + unsafe { alloc::alloc(new_layout) } + } else { + let old_layout = Layout::array::<T>(self.cap).unwrap(); + let old_ptr = self.ptr.as_ptr() as *mut u8; + unsafe { alloc::realloc(old_ptr, old_layout, new_layout.size()) } + }; + + // If allocation fails, `new_ptr` will be null, in which case we abort. + self.ptr = match NonNull::new(new_ptr as *mut T) { + Some(p) => p, + None => alloc::handle_alloc_error(new_layout), + }; + self.cap = new_cap; + } +} + +impl<T> Drop for RawVec<T> { + fn drop(&mut self) { + let elem_size = mem::size_of::<T>(); + + if self.cap != 0 && elem_size != 0 { + unsafe { + alloc::dealloc( + self.ptr.as_ptr() as *mut u8, + Layout::array::<T>(self.cap).unwrap(), + ); + } + } + } +} +``` + +That's it. We support pushing and popping zero-sized types now. Our iterators +(that aren't provided by slice Deref) are still busted, though. + +## Iterating Zero-Sized Types + +Zero-sized offsets are no-ops. This means that our current design will always +initialize `start` and `end` as the same value, and our iterators will yield +nothing. The current solution to this is to cast the pointers to integers, +increment, and then cast them back: + +<!-- ignore: simplified code --> +```rust,ignore +impl<T> RawValIter<T> { + unsafe fn new(slice: &[T]) -> Self { + RawValIter { + start: slice.as_ptr(), + end: if mem::size_of::<T>() == 0 { + ((slice.as_ptr() as usize) + slice.len()) as *const _ + } else if slice.len() == 0 { + slice.as_ptr() + } else { + slice.as_ptr().add(slice.len()) + }, + } + } +} +``` + +Now we have a different bug. Instead of our iterators not running at all, our +iterators now run *forever*. We need to do the same trick in our iterator impls. +Also, our size_hint computation code will divide by 0 for ZSTs. Since we'll +basically be treating the two pointers as if they point to bytes, we'll just +map size 0 to divide by 1. Here's what `next` will be: + +<!-- ignore: simplified code --> +```rust,ignore +fn next(&mut self) -> Option<T> { + if self.start == self.end { + None + } else { + unsafe { + let result = ptr::read(self.start); + self.start = if mem::size_of::<T>() == 0 { + (self.start as usize + 1) as *const _ + } else { + self.start.offset(1) + }; + Some(result) + } + } +} +``` + +Do you see the "bug"? No one else did! The original author only noticed the +problem when linking to this page years later. This code is kind of dubious +because abusing the iterator pointers to be *counters* makes them unaligned! +Our *one job* when using ZSTs is to keep pointers aligned! *forehead slap* + +Raw pointers don't need to be aligned at all times, so the basic trick of +using pointers as counters is *fine*, but they *should* definitely be aligned +when passed to `ptr::read`! This is *possibly* needless pedantry +because `ptr::read` is a noop for a ZST, but let's be a *little* more +responsible and read from `NonNull::dangling` on the ZST path. + +(Alternatively you could call `read_unaligned` on the ZST path. Either is fine, +because either way we're making up a value from nothing and it all compiles +to doing nothing.) + +<!-- ignore: simplified code --> +```rust,ignore +impl<T> Iterator for RawValIter<T> { + type Item = T; + fn next(&mut self) -> Option<T> { + if self.start == self.end { + None + } else { + unsafe { + if mem::size_of::<T>() == 0 { + self.start = (self.start as usize + 1) as *const _; + Some(ptr::read(NonNull::<T>::dangling().as_ptr())) + } else { + let old_ptr = self.start; + self.start = self.start.offset(1); + Some(ptr::read(old_ptr)) + } + } + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let elem_size = mem::size_of::<T>(); + let len = (self.end as usize - self.start as usize) + / if elem_size == 0 { 1 } else { elem_size }; + (len, Some(len)) + } +} + +impl<T> DoubleEndedIterator for RawValIter<T> { + fn next_back(&mut self) -> Option<T> { + if self.start == self.end { + None + } else { + unsafe { + if mem::size_of::<T>() == 0 { + self.end = (self.end as usize - 1) as *const _; + Some(ptr::read(NonNull::<T>::dangling().as_ptr())) + } else { + self.end = self.end.offset(-1); + Some(ptr::read(self.end)) + } + } + } + } +} +``` + +And that's it. Iteration works! |