summaryrefslogtreecommitdiffstats
path: root/third_party/rust/minidump-writer/src/mac/task_dumper.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/minidump-writer/src/mac/task_dumper.rs')
-rw-r--r--third_party/rust/minidump-writer/src/mac/task_dumper.rs462
1 files changed, 462 insertions, 0 deletions
diff --git a/third_party/rust/minidump-writer/src/mac/task_dumper.rs b/third_party/rust/minidump-writer/src/mac/task_dumper.rs
new file mode 100644
index 0000000000..013d432d26
--- /dev/null
+++ b/third_party/rust/minidump-writer/src/mac/task_dumper.rs
@@ -0,0 +1,462 @@
+use crate::mac::mach;
+use mach2::mach_types as mt;
+use thiserror::Error;
+
+#[derive(Error, Debug)]
+pub enum TaskDumpError {
+ #[error("kernel error {syscall} {error})")]
+ Kernel {
+ syscall: &'static str,
+ error: mach::KernelError,
+ },
+ #[error("detected an invalid mach image header")]
+ InvalidMachHeader,
+ #[error(transparent)]
+ NonUtf8String(#[from] std::string::FromUtf8Error),
+ #[error("unable to find the main executable image for the process")]
+ NoExecutableImage,
+ #[error("expected load command {name}({id:?}) was not found for an image")]
+ MissingLoadCommand {
+ name: &'static str,
+ id: mach::LoadCommandKind,
+ },
+}
+
+/// Wraps a mach call in a Result
+macro_rules! mach_call {
+ ($call:expr) => {{
+ // SAFETY: syscall
+ let kr = unsafe { $call };
+ if kr == mach::KERN_SUCCESS {
+ Ok(())
+ } else {
+ // This is ugly, improvements to the macro welcome!
+ let mut syscall = stringify!($call);
+ if let Some(i) = syscall.find('(') {
+ syscall = &syscall[..i];
+ }
+ Err(TaskDumpError::Kernel {
+ syscall,
+ error: kr.into(),
+ })
+ }
+ }};
+}
+
+/// `dyld_all_image_infos` from <usr/include/mach-o/dyld_images.h>
+///
+/// This struct is truncated as we only need a couple of fields at the beginning
+/// of the struct
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct AllImagesInfo {
+ // VERSION 1
+ pub version: u32,
+ /// The number of [`ImageInfo`] structs at that following address
+ info_array_count: u32,
+ /// The address in the process where the array of [`ImageInfo`] structs is
+ info_array_addr: u64,
+ /// A function pointer, unused
+ _notification: u64,
+ /// Unused
+ _process_detached_from_shared_region: bool,
+ // VERSION 2
+ lib_system_initialized: bool,
+ // Note that crashpad adds a 32-bit int here to get proper alignment when
+ // building on 32-bit targets...but we explicitly don't care about 32-bit
+ // targets since Apple doesn't
+ pub dyld_image_load_address: u64,
+}
+
+/// `dyld_image_info` from <usr/include/mach-o/dyld_images.h>
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct ImageInfo {
+ /// The address in the process where the image is loaded
+ pub load_address: u64,
+ /// The address in the process where the image's file path can be read
+ pub file_path: u64,
+ /// Timestamp for when the image's file was last modified
+ pub file_mod_date: u64,
+}
+
+impl PartialEq for ImageInfo {
+ fn eq(&self, o: &Self) -> bool {
+ self.load_address == o.load_address
+ }
+}
+
+impl Eq for ImageInfo {}
+
+impl Ord for ImageInfo {
+ fn cmp(&self, o: &Self) -> std::cmp::Ordering {
+ self.load_address.cmp(&o.load_address)
+ }
+}
+
+impl PartialOrd for ImageInfo {
+ fn partial_cmp(&self, o: &Self) -> Option<std::cmp::Ordering> {
+ Some(self.cmp(o))
+ }
+}
+
+/// Describes a region of virtual memory
+pub struct VMRegionInfo {
+ pub info: mach::vm_region_submap_info_64,
+ pub range: std::ops::Range<u64>,
+}
+
+/// Similarly to PtraceDumper for Linux, this provides access to information
+/// for a task (MacOS process)
+pub struct TaskDumper {
+ task: mt::task_t,
+ page_size: i64,
+}
+
+impl TaskDumper {
+ /// Constructs a [`TaskDumper`] for the specified task
+ pub fn new(task: mt::task_t) -> Self {
+ Self {
+ task,
+ // SAFETY: syscall
+ page_size: unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as i64,
+ }
+ }
+
+ /// Reads a block of memory from the task
+ ///
+ /// # Errors
+ ///
+ /// The syscall to read the task's memory fails for some reason, eg bad address.
+ pub fn read_task_memory<T>(&self, address: u64, count: usize) -> Result<Vec<T>, TaskDumpError>
+ where
+ T: Sized + Clone,
+ {
+ let length = (count * std::mem::size_of::<T>()) as u64;
+
+ // use the negative of the page size for the mask to find the page address
+ let page_address = address & (-self.page_size as u64);
+ let last_page_address =
+ (address + length + (self.page_size - 1) as u64) & (-self.page_size as u64);
+
+ let page_size = last_page_address - page_address;
+ let mut local_start = 0;
+ let mut local_length = 0;
+
+ mach_call!(mach::mach_vm_read(
+ self.task,
+ page_address,
+ page_size,
+ &mut local_start,
+ &mut local_length
+ ))?;
+
+ let mut buffer = Vec::with_capacity(count);
+
+ // SAFETY: this is safe as long as the kernel has not lied to us
+ let task_buffer = unsafe {
+ std::slice::from_raw_parts(
+ (local_start as *const u8)
+ .offset((address - page_address) as isize)
+ .cast(),
+ count,
+ )
+ };
+ buffer.extend_from_slice(task_buffer);
+
+ // Don't worry about the return here, if something goes wrong there's probably
+ // not much we can do about it, and we have what we want anyways
+ let _res = mach_call!(mach::mach_vm_deallocate(
+ mach::mach_task_self(),
+ local_start as u64, // vm_read returns a pointer, but vm_deallocate takes a integer address :-/
+ local_length as u64, // vm_read and vm_deallocate use different sizes :-/
+ ));
+
+ Ok(buffer)
+ }
+
+ /// Reads a null terminated string starting at the specified address. This
+ /// is a specialization of [`read_task_memory`] since strings can span VM
+ /// regions.
+ ///
+ /// If not specified, the string is capped at 8k which should never be close
+ /// to being hit in normal scenarios, at least for "system" strings, which is
+ /// all this interface is used to retrieve
+ ///
+ /// # Errors
+ ///
+ /// Fails if the address cannot be read for some reason, or the string is
+ /// not utf-8.
+ pub fn read_string(
+ &self,
+ addr: u64,
+ expected_size: Option<usize>,
+ ) -> Result<Option<String>, TaskDumpError> {
+ // The problem is we don't know how much to read until we know how long
+ // the string is. And we don't know how long the string is, until we've read
+ // the memory! So, we'll try to read kMaxStringLength bytes
+ // (or as many bytes as we can until we reach the end of the vm region).
+ let get_region_size = || -> Result<u64, TaskDumpError> {
+ let region = self.get_vm_region(addr)?;
+
+ let mut size_to_end = region.range.end - addr;
+
+ // If the remaining is less than 4k, check if the next region is
+ // contiguous, and extend the memory that could contain the string
+ // to include it
+ if size_to_end < 4 * 1024 {
+ let maybe_adjacent = self.get_vm_region(region.range.end)?;
+
+ if maybe_adjacent.range.start == region.range.end {
+ size_to_end += maybe_adjacent.range.end - maybe_adjacent.range.start;
+ }
+ }
+
+ Ok(size_to_end)
+ };
+
+ if let Ok(size_to_end) = get_region_size() {
+ let mut bytes = self.read_task_memory(
+ addr,
+ std::cmp::min(size_to_end as usize, expected_size.unwrap_or(8 * 1024)),
+ )?;
+
+ // Find the null terminator and truncate our string
+ if let Some(null_pos) = bytes.iter().position(|c| *c == 0) {
+ bytes.resize(null_pos, 0);
+ }
+
+ Ok(String::from_utf8(bytes).map(Some)?)
+ } else {
+ Ok(None)
+ }
+ }
+
+ /// Retrives information on the virtual memory region the specified address
+ /// is located within.
+ ///
+ /// # Errors
+ ///
+ /// The syscall to retrieve the VM region information fails for some reason,
+ /// eg. a bad address.
+ pub fn get_vm_region(&self, addr: u64) -> Result<VMRegionInfo, TaskDumpError> {
+ let mut region_base = addr;
+ let mut region_size = 0;
+ let mut nesting_level = 0;
+ let mut submap_info = std::mem::MaybeUninit::<mach::vm_region_submap_info_64>::uninit();
+
+ // <user/include/mach/vm_region.h>
+ const VM_REGION_SUBMAP_INFO_COUNT_64: u32 =
+ (std::mem::size_of::<mach::vm_region_submap_info_64>() / std::mem::size_of::<u32>())
+ as u32;
+
+ let mut info_count = VM_REGION_SUBMAP_INFO_COUNT_64;
+
+ mach_call!(mach::mach_vm_region_recurse(
+ self.task,
+ &mut region_base,
+ &mut region_size,
+ &mut nesting_level,
+ submap_info.as_mut_ptr().cast(),
+ &mut info_count,
+ ))?;
+
+ Ok(VMRegionInfo {
+ // SAFETY: this will be valid if the syscall succeeded
+ info: unsafe { submap_info.assume_init() },
+ range: region_base..region_base + region_size,
+ })
+ }
+
+ /// Retrieves the state of the specified thread. The state is an architecture
+ /// specific block of CPU context ie register state.
+ ///
+ /// # Errors
+ ///
+ /// The specified thread id is invalid, or the thread is in a task that is
+ /// compiled for a different architecture than this local task.
+ pub fn read_thread_state(&self, tid: u32) -> Result<mach::ThreadState, TaskDumpError> {
+ let mut thread_state = mach::ThreadState::default();
+
+ mach_call!(mach::thread_get_state(
+ tid,
+ mach::THREAD_STATE_FLAVOR as i32,
+ thread_state.state.as_mut_ptr(),
+ &mut thread_state.state_size,
+ ))?;
+
+ Ok(thread_state)
+ }
+
+ /// Reads the specified task information.
+ ///
+ /// # Errors
+ ///
+ /// The syscall to receive the task information failed for some reason, eg.
+ /// the specified type and the flavor are mismatched and considered invalid.
+ pub fn task_info<T: mach::TaskInfo>(&self) -> Result<T, TaskDumpError> {
+ let mut info = std::mem::MaybeUninit::<T>::uninit();
+ let mut count = (std::mem::size_of::<T>() / std::mem::size_of::<u32>()) as u32;
+
+ mach_call!(mach::task::task_info(
+ self.task,
+ T::FLAVOR,
+ info.as_mut_ptr().cast(),
+ &mut count
+ ))?;
+
+ // SAFETY: this will be initialized if the call succeeded
+ unsafe { Ok(info.assume_init()) }
+ }
+
+ /// Reads the specified task information.
+ ///
+ /// # Errors
+ ///
+ /// The syscall to receive the task information failed for some reason, eg.
+ /// the specified type and the flavor are mismatched and considered invalid,
+ /// or the thread no longer exists
+ pub fn thread_info<T: mach::ThreadInfo>(&self, tid: u32) -> Result<T, TaskDumpError> {
+ let mut thread_info = std::mem::MaybeUninit::<T>::uninit();
+ let mut count = (std::mem::size_of::<T>() / std::mem::size_of::<u32>()) as u32;
+
+ mach_call!(mach::thread_info(
+ tid,
+ T::FLAVOR,
+ thread_info.as_mut_ptr().cast(),
+ &mut count,
+ ))?;
+
+ // SAFETY: this will be initialized if the call succeeded
+ unsafe { Ok(thread_info.assume_init()) }
+ }
+
+ /// Retrieves all of the images loaded in the task.
+ ///
+ /// Note that there may be multiple images with the same load address.
+ ///
+ /// # Errors
+ ///
+ /// The syscall to retrieve the location of the loaded images fails, or
+ /// the syscall to read the loaded images from the process memory fails
+ pub fn read_images(&self) -> Result<(AllImagesInfo, Vec<ImageInfo>), TaskDumpError> {
+ impl mach::TaskInfo for mach::task_info::task_dyld_info {
+ const FLAVOR: u32 = mach::task_info::TASK_DYLD_INFO;
+ }
+
+ // Retrieve the address at which the list of loaded images is located
+ // within the task
+ let all_images_addr = {
+ let dyld_info = self.task_info::<mach::task_info::task_dyld_info>()?;
+ dyld_info.all_image_info_addr
+ };
+
+ // Here we make the assumption that dyld loaded at the same address in
+ // the crashed process vs. this one. This is an assumption made in
+ // "dyld_debug.c" and is said to be nearly always valid.
+ let dyld_all_info_buf =
+ self.read_task_memory::<u8>(all_images_addr, std::mem::size_of::<AllImagesInfo>())?;
+ // SAFETY: this is fine as long as the kernel isn't lying to us
+ let all_images_info: &AllImagesInfo = unsafe { &*(dyld_all_info_buf.as_ptr().cast()) };
+
+ let images = self.read_task_memory::<ImageInfo>(
+ all_images_info.info_array_addr,
+ all_images_info.info_array_count as usize,
+ )?;
+
+ Ok((*all_images_info, images))
+ }
+
+ /// Retrieves the main executable image for the task.
+ ///
+ /// Note that this method is currently only used for tests due to deficiencies
+ /// in `otool`
+ ///
+ /// # Errors
+ ///
+ /// Any of the errors that apply to [`Self::read_images`] apply here, in
+ /// addition to not being able to find the main executable image
+ pub fn read_executable_image(&self) -> Result<ImageInfo, TaskDumpError> {
+ let (_, images) = self.read_images()?;
+
+ for img in images {
+ let mach_header = self.read_task_memory::<mach::MachHeader>(img.load_address, 1)?;
+
+ let header = &mach_header[0];
+
+ if header.magic != mach::MH_MAGIC_64 {
+ return Err(TaskDumpError::InvalidMachHeader);
+ }
+
+ if header.file_type == mach::MH_EXECUTE {
+ return Ok(img);
+ }
+ }
+
+ Err(TaskDumpError::NoExecutableImage)
+ }
+
+ /// Retrieves the load commands for the specified image
+ ///
+ /// # Errors
+ ///
+ /// We fail to read the image header for the specified image, the header we
+ /// read is determined to be invalid, or we fail to read the block of memory
+ /// containing the load commands themselves.
+ pub fn read_load_commands(&self, img: &ImageInfo) -> Result<mach::LoadCommands, TaskDumpError> {
+ let mach_header = self.read_task_memory::<mach::MachHeader>(img.load_address, 1)?;
+
+ let header = &mach_header[0];
+
+ if header.magic != mach::MH_MAGIC_64 {
+ return Err(TaskDumpError::InvalidMachHeader);
+ }
+
+ // Read the load commands which immediately follow the image header from
+ // the task memory. Note that load commands vary in size so we need to
+ // retrieve the memory as a raw byte buffer that we can then iterate
+ // through and step according to the size of each load command
+ let load_commands_buf = self.read_task_memory::<u8>(
+ img.load_address + std::mem::size_of::<mach::MachHeader>() as u64,
+ header.size_commands as usize,
+ )?;
+
+ Ok(mach::LoadCommands {
+ buffer: load_commands_buf,
+ count: header.num_commands,
+ })
+ }
+
+ /// Gets a list of all of the thread ids in the task
+ ///
+ /// # Errors
+ ///
+ /// The syscall to retrieve the list of threads fails
+ pub fn read_threads(&self) -> Result<&'static [u32], TaskDumpError> {
+ let mut threads = std::ptr::null_mut();
+ let mut thread_count = 0;
+
+ mach_call!(mach::task_threads(
+ self.task,
+ &mut threads,
+ &mut thread_count
+ ))?;
+
+ Ok(
+ // SAFETY: This should be valid if the call succeeded
+ unsafe { std::slice::from_raw_parts(threads, thread_count as usize) },
+ )
+ }
+
+ /// Retrieves the PID for the task
+ ///
+ /// # Errors
+ ///
+ /// Presumably the only way this would fail would be if the task we are
+ /// dumping disappears.
+ pub fn pid_for_task(&self) -> Result<i32, TaskDumpError> {
+ let mut pid = 0;
+ mach_call!(mach::pid_for_task(self.task, &mut pid))?;
+ Ok(pid)
+ }
+}