//! Defining custom `Reader`s quickly. use alloc::borrow::Cow; use alloc::rc::Rc; use alloc::string::String; use alloc::sync::Arc; use core::fmt::Debug; use core::ops::{Deref, Index, Range, RangeFrom, RangeTo}; use core::slice; use core::str; use stable_deref_trait::CloneStableDeref; use crate::endianity::Endianity; use crate::read::{Error, Reader, ReaderOffsetId, Result}; /// A reference counted, non-thread-safe slice of bytes and associated /// endianity. /// /// ``` /// # #[cfg(feature = "std")] { /// use std::rc::Rc; /// /// let buf = Rc::from(&[1, 2, 3, 4][..]); /// let reader = gimli::EndianRcSlice::new(buf, gimli::NativeEndian); /// # let _ = reader; /// # } /// ``` pub type EndianRcSlice = EndianReader>; /// An atomically reference counted, thread-safe slice of bytes and associated /// endianity. /// /// ``` /// # #[cfg(feature = "std")] { /// use std::sync::Arc; /// /// let buf = Arc::from(&[1, 2, 3, 4][..]); /// let reader = gimli::EndianArcSlice::new(buf, gimli::NativeEndian); /// # let _ = reader; /// # } /// ``` pub type EndianArcSlice = EndianReader>; /// An easy way to define a custom `Reader` implementation with a reference to a /// generic buffer of bytes and an associated endianity. /// /// Note that the whole original buffer is kept alive in memory even if there is /// only one reader that references only a handful of bytes from that original /// buffer. That is, `EndianReader` will not do any copying, moving, or /// compacting in order to free up unused regions of the original buffer. If you /// require this kind of behavior, it is up to you to implement `Reader` /// directly by-hand. /// /// # Example /// /// Say you have an `mmap`ed file that you want to serve as a `gimli::Reader`. /// You can wrap that `mmap`ed file up in a `MmapFile` type and use /// `EndianReader>` or `EndianReader>` as readers as /// long as `MmapFile` dereferences to the underlying `[u8]` data. /// /// ``` /// use std::io; /// use std::ops::Deref; /// use std::path::Path; /// use std::slice; /// use std::sync::Arc; /// /// /// A type that represents an `mmap`ed file. /// #[derive(Debug)] /// pub struct MmapFile { /// ptr: *const u8, /// len: usize, /// } /// /// impl MmapFile { /// pub fn new(path: &Path) -> io::Result { /// // Call `mmap` and check for errors and all that... /// # unimplemented!() /// } /// } /// /// impl Drop for MmapFile { /// fn drop(&mut self) { /// // Call `munmap` to clean up after ourselves... /// # unimplemented!() /// } /// } /// /// // And `MmapFile` can deref to a slice of the `mmap`ed region of memory. /// impl Deref for MmapFile { /// type Target = [u8]; /// fn deref(&self) -> &[u8] { /// unsafe { /// slice::from_raw_parts(self.ptr, self.len) /// } /// } /// } /// /// /// A type that represents a shared `mmap`ed file. /// #[derive(Debug, Clone)] /// pub struct ArcMmapFile(Arc); /// /// // And `ArcMmapFile` can deref to a slice of the `mmap`ed region of memory. /// impl Deref for ArcMmapFile { /// type Target = [u8]; /// fn deref(&self) -> &[u8] { /// &self.0 /// } /// } /// /// // These are both valid for any `Rc` or `Arc`. /// unsafe impl gimli::StableDeref for ArcMmapFile {} /// unsafe impl gimli::CloneStableDeref for ArcMmapFile {} /// /// /// A `gimli::Reader` that is backed by an `mmap`ed file! /// pub type MmapFileReader = gimli::EndianReader; /// # fn test(_: &MmapFileReader) { } /// ``` #[derive(Debug, Clone, Copy, Hash)] pub struct EndianReader where Endian: Endianity, T: CloneStableDeref + Debug, { range: SubRange, endian: Endian, } impl PartialEq> for EndianReader where Endian: Endianity, T1: CloneStableDeref + Debug, T2: CloneStableDeref + Debug, { fn eq(&self, rhs: &EndianReader) -> bool { self.bytes() == rhs.bytes() } } impl Eq for EndianReader where Endian: Endianity, T: CloneStableDeref + Debug, { } // This is separated out from `EndianReader` so that we can avoid running afoul // of borrowck. We need to `read_slice(&mut self, ...) -> &[u8]` and then call // `self.endian.read_whatever` on the result. The problem is that the returned // slice keeps the `&mut self` borrow active, so we wouldn't be able to access // `self.endian`. Splitting the sub-range out from the endian lets us work // around this, making it so that only the `self.range` borrow is held active, // not all of `self`. // // This also serves to encapsulate the unsafe code concerning `CloneStableDeref`. // The `bytes` member is held so that the bytes live long enough, and the // `CloneStableDeref` ensures these bytes never move. The `ptr` and `len` // members point inside `bytes`, and are updated during read operations. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] struct SubRange where T: CloneStableDeref + Debug, { bytes: T, ptr: *const u8, len: usize, } unsafe impl Send for SubRange where T: CloneStableDeref + Debug + Send {} unsafe impl Sync for SubRange where T: CloneStableDeref + Debug + Sync {} impl SubRange where T: CloneStableDeref + Debug, { #[inline] fn new(bytes: T) -> Self { let ptr = bytes.as_ptr(); let len = bytes.len(); SubRange { bytes, ptr, len } } #[inline] fn bytes(&self) -> &[u8] { // Safe because `T` implements `CloneStableDeref`, `bytes` can't be modified, // and all operations that modify `ptr` and `len` ensure they stay in range. unsafe { slice::from_raw_parts(self.ptr, self.len) } } #[inline] fn len(&self) -> usize { self.len } #[inline] fn truncate(&mut self, len: usize) { assert!(len <= self.len); self.len = len; } #[inline] fn skip(&mut self, len: usize) { assert!(len <= self.len); self.ptr = unsafe { self.ptr.add(len) }; self.len -= len; } #[inline] fn read_slice(&mut self, len: usize) -> Option<&[u8]> { if self.len() < len { None } else { // Same as for `bytes()`. let bytes = unsafe { slice::from_raw_parts(self.ptr, len) }; self.skip(len); Some(bytes) } } } impl EndianReader where Endian: Endianity, T: CloneStableDeref + Debug, { /// Construct a new `EndianReader` with the given bytes. #[inline] pub fn new(bytes: T, endian: Endian) -> EndianReader { EndianReader { range: SubRange::new(bytes), endian, } } /// Return a reference to the raw bytes underlying this reader. #[inline] pub fn bytes(&self) -> &[u8] { self.range.bytes() } } /// # Range Methods /// /// Unfortunately, `std::ops::Index` *must* return a reference, so we can't /// implement `Index>` to return a new `EndianReader` the way we /// would like to. Instead, we abandon fancy indexing operators and have these /// plain old methods. impl EndianReader where Endian: Endianity, T: CloneStableDeref + Debug, { /// Take the given `start..end` range of the underlying buffer and return a /// new `EndianReader`. /// /// ``` /// # #[cfg(feature = "std")] { /// use gimli::{EndianReader, LittleEndian}; /// use std::sync::Arc; /// /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]); /// let reader = EndianReader::new(buf.clone(), LittleEndian); /// assert_eq!(reader.range(1..3), /// EndianReader::new(&buf[1..3], LittleEndian)); /// # } /// ``` /// /// # Panics /// /// Panics if the range is out of bounds. pub fn range(&self, idx: Range) -> EndianReader { let mut r = self.clone(); r.range.skip(idx.start); r.range.truncate(idx.len()); r } /// Take the given `start..` range of the underlying buffer and return a new /// `EndianReader`. /// /// ``` /// # #[cfg(feature = "std")] { /// use gimli::{EndianReader, LittleEndian}; /// use std::sync::Arc; /// /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]); /// let reader = EndianReader::new(buf.clone(), LittleEndian); /// assert_eq!(reader.range_from(2..), /// EndianReader::new(&buf[2..], LittleEndian)); /// # } /// ``` /// /// # Panics /// /// Panics if the range is out of bounds. pub fn range_from(&self, idx: RangeFrom) -> EndianReader { let mut r = self.clone(); r.range.skip(idx.start); r } /// Take the given `..end` range of the underlying buffer and return a new /// `EndianReader`. /// /// ``` /// # #[cfg(feature = "std")] { /// use gimli::{EndianReader, LittleEndian}; /// use std::sync::Arc; /// /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]); /// let reader = EndianReader::new(buf.clone(), LittleEndian); /// assert_eq!(reader.range_to(..3), /// EndianReader::new(&buf[..3], LittleEndian)); /// # } /// ``` /// /// # Panics /// /// Panics if the range is out of bounds. pub fn range_to(&self, idx: RangeTo) -> EndianReader { let mut r = self.clone(); r.range.truncate(idx.end); r } } impl Index for EndianReader where Endian: Endianity, T: CloneStableDeref + Debug, { type Output = u8; fn index(&self, idx: usize) -> &Self::Output { &self.bytes()[idx] } } impl Index> for EndianReader where Endian: Endianity, T: CloneStableDeref + Debug, { type Output = [u8]; fn index(&self, idx: RangeFrom) -> &Self::Output { &self.bytes()[idx] } } impl Deref for EndianReader where Endian: Endianity, T: CloneStableDeref + Debug, { type Target = [u8]; fn deref(&self) -> &Self::Target { self.bytes() } } impl Reader for EndianReader where Endian: Endianity, T: CloneStableDeref + Debug, { type Endian = Endian; type Offset = usize; #[inline] fn endian(&self) -> Endian { self.endian } #[inline] fn len(&self) -> usize { self.range.len() } #[inline] fn empty(&mut self) { self.range.truncate(0); } #[inline] fn truncate(&mut self, len: usize) -> Result<()> { if self.len() < len { Err(Error::UnexpectedEof(self.offset_id())) } else { self.range.truncate(len); Ok(()) } } #[inline] fn offset_from(&self, base: &EndianReader) -> usize { let base_ptr = base.bytes().as_ptr() as *const u8 as usize; let ptr = self.bytes().as_ptr() as *const u8 as usize; debug_assert!(base_ptr <= ptr); debug_assert!(ptr + self.bytes().len() <= base_ptr + base.bytes().len()); ptr - base_ptr } #[inline] fn offset_id(&self) -> ReaderOffsetId { ReaderOffsetId(self.bytes().as_ptr() as u64) } #[inline] fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option { let id = id.0; let self_id = self.bytes().as_ptr() as u64; let self_len = self.bytes().len() as u64; if id >= self_id && id <= self_id + self_len { Some((id - self_id) as usize) } else { None } } #[inline] fn find(&self, byte: u8) -> Result { self.bytes() .iter() .position(|x| *x == byte) .ok_or_else(|| Error::UnexpectedEof(self.offset_id())) } #[inline] fn skip(&mut self, len: usize) -> Result<()> { if self.len() < len { Err(Error::UnexpectedEof(self.offset_id())) } else { self.range.skip(len); Ok(()) } } #[inline] fn split(&mut self, len: usize) -> Result { if self.len() < len { Err(Error::UnexpectedEof(self.offset_id())) } else { let mut r = self.clone(); r.range.truncate(len); self.range.skip(len); Ok(r) } } #[inline] fn to_slice(&self) -> Result> { Ok(self.bytes().into()) } #[inline] fn to_string(&self) -> Result> { match str::from_utf8(self.bytes()) { Ok(s) => Ok(s.into()), _ => Err(Error::BadUtf8), } } #[inline] fn to_string_lossy(&self) -> Result> { Ok(String::from_utf8_lossy(self.bytes())) } #[inline] fn read_slice(&mut self, buf: &mut [u8]) -> Result<()> { match self.range.read_slice(buf.len()) { Some(slice) => { buf.copy_from_slice(slice); Ok(()) } None => Err(Error::UnexpectedEof(self.offset_id())), } } } #[cfg(test)] mod tests { use super::*; use crate::endianity::NativeEndian; use crate::read::Reader; fn native_reader + Debug>( bytes: T, ) -> EndianReader { EndianReader::new(bytes, NativeEndian) } const BUF: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]; #[test] fn test_reader_split() { let mut reader = native_reader(BUF); let left = reader.split(3).unwrap(); assert_eq!(left, native_reader(&BUF[..3])); assert_eq!(reader, native_reader(&BUF[3..])); } #[test] fn test_reader_split_out_of_bounds() { let mut reader = native_reader(BUF); assert!(reader.split(30).is_err()); } #[test] fn bytes_and_len_and_range_and_eq() { let reader = native_reader(BUF); assert_eq!(reader.len(), BUF.len()); assert_eq!(reader.bytes(), BUF); assert_eq!(reader, native_reader(BUF)); let range = reader.range(2..8); let buf_range = &BUF[2..8]; assert_eq!(range.len(), buf_range.len()); assert_eq!(range.bytes(), buf_range); assert_ne!(range, native_reader(BUF)); assert_eq!(range, native_reader(buf_range)); let range_from = range.range_from(1..); let buf_range_from = &buf_range[1..]; assert_eq!(range_from.len(), buf_range_from.len()); assert_eq!(range_from.bytes(), buf_range_from); assert_ne!(range_from, native_reader(BUF)); assert_eq!(range_from, native_reader(buf_range_from)); let range_to = range_from.range_to(..4); let buf_range_to = &buf_range_from[..4]; assert_eq!(range_to.len(), buf_range_to.len()); assert_eq!(range_to.bytes(), buf_range_to); assert_ne!(range_to, native_reader(BUF)); assert_eq!(range_to, native_reader(buf_range_to)); } #[test] fn find() { let mut reader = native_reader(BUF); reader.skip(2).unwrap(); assert_eq!( reader.find(5), Ok(BUF[2..].iter().position(|x| *x == 5).unwrap()) ); } #[test] fn indexing() { let mut reader = native_reader(BUF); reader.skip(2).unwrap(); assert_eq!(reader[0], BUF[2]); } #[test] #[should_panic] fn indexing_out_of_bounds() { let mut reader = native_reader(BUF); reader.skip(2).unwrap(); let _ = reader[900]; } #[test] fn endian() { let reader = native_reader(BUF); assert_eq!(reader.endian(), NativeEndian); } #[test] fn empty() { let mut reader = native_reader(BUF); assert!(!reader.is_empty()); reader.empty(); assert!(reader.is_empty()); assert!(reader.bytes().is_empty()); } #[test] fn truncate() { let reader = native_reader(BUF); let mut reader = reader.range(2..8); reader.truncate(2).unwrap(); assert_eq!(reader.bytes(), &BUF[2..4]); } #[test] fn offset_from() { let reader = native_reader(BUF); let sub = reader.range(2..8); assert_eq!(sub.offset_from(&reader), 2); } #[test] fn skip() { let mut reader = native_reader(BUF); reader.skip(2).unwrap(); assert_eq!(reader.bytes(), &BUF[2..]); } #[test] fn to_slice() { assert_eq!( native_reader(BUF).range(2..5).to_slice(), Ok(Cow::from(&BUF[2..5])) ); } #[test] fn to_string_ok() { let buf = b"hello, world!"; let reader = native_reader(&buf[..]); let reader = reader.range_from(7..); assert_eq!(reader.to_string(), Ok(Cow::from("world!"))); } // The rocket emoji (🚀 = [0xf0, 0x9f, 0x9a, 0x80]) but rotated left by one // to make it invalid UTF-8. const BAD_UTF8: &[u8] = &[0x9f, 0x9a, 0x80, 0xf0]; #[test] fn to_string_err() { let reader = native_reader(BAD_UTF8); assert!(reader.to_string().is_err()); } #[test] fn to_string_lossy() { let reader = native_reader(BAD_UTF8); assert_eq!(reader.to_string_lossy(), Ok(Cow::from("����"))); } #[test] fn read_u8_array() { let mut reader = native_reader(BAD_UTF8); reader.skip(1).unwrap(); let arr: [u8; 2] = reader.read_u8_array().unwrap(); assert_eq!(arr, &BAD_UTF8[1..3]); assert_eq!(reader.bytes(), &BAD_UTF8[3..]); } }