use crate::mac::mach; use mach2::mach_types as mt; use thiserror::Error; #[derive(Error, Debug)] pub enum TaskDumpError { #[error("kernel error {syscall} {error})")] Kernel { syscall: &'static str, error: mach::KernelError, }, #[error("detected an invalid mach image header")] InvalidMachHeader, #[error(transparent)] NonUtf8String(#[from] std::string::FromUtf8Error), #[error("unable to find the main executable image for the process")] NoExecutableImage, #[error("expected load command {name}({id:?}) was not found for an image")] MissingLoadCommand { name: &'static str, id: mach::LoadCommandKind, }, } /// Wraps a mach call in a Result macro_rules! mach_call { ($call:expr) => {{ // SAFETY: syscall let kr = unsafe { $call }; if kr == mach::KERN_SUCCESS { Ok(()) } else { // This is ugly, improvements to the macro welcome! let mut syscall = stringify!($call); if let Some(i) = syscall.find('(') { syscall = &syscall[..i]; } Err(TaskDumpError::Kernel { syscall, error: kr.into(), }) } }}; } /// `dyld_all_image_infos` from /// /// This struct is truncated as we only need a couple of fields at the beginning /// of the struct #[repr(C)] #[derive(Copy, Clone)] pub struct AllImagesInfo { // VERSION 1 pub version: u32, /// The number of [`ImageInfo`] structs at that following address info_array_count: u32, /// The address in the process where the array of [`ImageInfo`] structs is info_array_addr: u64, /// A function pointer, unused _notification: u64, /// Unused _process_detached_from_shared_region: bool, // VERSION 2 lib_system_initialized: bool, // Note that crashpad adds a 32-bit int here to get proper alignment when // building on 32-bit targets...but we explicitly don't care about 32-bit // targets since Apple doesn't pub dyld_image_load_address: u64, } /// `dyld_image_info` from #[repr(C)] #[derive(Debug, Clone, Copy)] pub struct ImageInfo { /// The address in the process where the image is loaded pub load_address: u64, /// The address in the process where the image's file path can be read pub file_path: u64, /// Timestamp for when the image's file was last modified pub file_mod_date: u64, } impl PartialEq for ImageInfo { fn eq(&self, o: &Self) -> bool { self.load_address == o.load_address } } impl Eq for ImageInfo {} impl Ord for ImageInfo { fn cmp(&self, o: &Self) -> std::cmp::Ordering { self.load_address.cmp(&o.load_address) } } impl PartialOrd for ImageInfo { fn partial_cmp(&self, o: &Self) -> Option { Some(self.cmp(o)) } } /// Describes a region of virtual memory pub struct VMRegionInfo { pub info: mach::vm_region_submap_info_64, pub range: std::ops::Range, } /// Similarly to PtraceDumper for Linux, this provides access to information /// for a task (MacOS process) pub struct TaskDumper { task: mt::task_t, page_size: i64, } impl TaskDumper { /// Constructs a [`TaskDumper`] for the specified task pub fn new(task: mt::task_t) -> Self { Self { task, // SAFETY: syscall page_size: unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as i64, } } /// Reads a block of memory from the task /// /// # Errors /// /// The syscall to read the task's memory fails for some reason, eg bad address. pub fn read_task_memory(&self, address: u64, count: usize) -> Result, TaskDumpError> where T: Sized + Clone, { let length = (count * std::mem::size_of::()) as u64; // use the negative of the page size for the mask to find the page address let page_address = address & (-self.page_size as u64); let last_page_address = (address + length + (self.page_size - 1) as u64) & (-self.page_size as u64); let page_size = last_page_address - page_address; let mut local_start = 0; let mut local_length = 0; mach_call!(mach::mach_vm_read( self.task, page_address, page_size, &mut local_start, &mut local_length ))?; let mut buffer = Vec::with_capacity(count); // SAFETY: this is safe as long as the kernel has not lied to us let task_buffer = unsafe { std::slice::from_raw_parts( (local_start as *const u8) .offset((address - page_address) as isize) .cast(), count, ) }; buffer.extend_from_slice(task_buffer); // Don't worry about the return here, if something goes wrong there's probably // not much we can do about it, and we have what we want anyways let _res = mach_call!(mach::mach_vm_deallocate( mach::mach_task_self(), local_start as u64, // vm_read returns a pointer, but vm_deallocate takes a integer address :-/ local_length as u64, // vm_read and vm_deallocate use different sizes :-/ )); Ok(buffer) } /// Reads a null terminated string starting at the specified address. This /// is a specialization of [`read_task_memory`] since strings can span VM /// regions. /// /// If not specified, the string is capped at 8k which should never be close /// to being hit in normal scenarios, at least for "system" strings, which is /// all this interface is used to retrieve /// /// # Errors /// /// Fails if the address cannot be read for some reason, or the string is /// not utf-8. pub fn read_string( &self, addr: u64, expected_size: Option, ) -> Result, TaskDumpError> { // The problem is we don't know how much to read until we know how long // the string is. And we don't know how long the string is, until we've read // the memory! So, we'll try to read kMaxStringLength bytes // (or as many bytes as we can until we reach the end of the vm region). let get_region_size = || -> Result { let region = self.get_vm_region(addr)?; let mut size_to_end = region.range.end - addr; // If the remaining is less than 4k, check if the next region is // contiguous, and extend the memory that could contain the string // to include it if size_to_end < 4 * 1024 { let maybe_adjacent = self.get_vm_region(region.range.end)?; if maybe_adjacent.range.start == region.range.end { size_to_end += maybe_adjacent.range.end - maybe_adjacent.range.start; } } Ok(size_to_end) }; if let Ok(size_to_end) = get_region_size() { let mut bytes = self.read_task_memory( addr, std::cmp::min(size_to_end as usize, expected_size.unwrap_or(8 * 1024)), )?; // Find the null terminator and truncate our string if let Some(null_pos) = bytes.iter().position(|c| *c == 0) { bytes.resize(null_pos, 0); } Ok(String::from_utf8(bytes).map(Some)?) } else { Ok(None) } } /// Retrives information on the virtual memory region the specified address /// is located within. /// /// # Errors /// /// The syscall to retrieve the VM region information fails for some reason, /// eg. a bad address. pub fn get_vm_region(&self, addr: u64) -> Result { let mut region_base = addr; let mut region_size = 0; let mut nesting_level = 0; let mut submap_info = std::mem::MaybeUninit::::uninit(); // const VM_REGION_SUBMAP_INFO_COUNT_64: u32 = (std::mem::size_of::() / std::mem::size_of::()) as u32; let mut info_count = VM_REGION_SUBMAP_INFO_COUNT_64; mach_call!(mach::mach_vm_region_recurse( self.task, &mut region_base, &mut region_size, &mut nesting_level, submap_info.as_mut_ptr().cast(), &mut info_count, ))?; Ok(VMRegionInfo { // SAFETY: this will be valid if the syscall succeeded info: unsafe { submap_info.assume_init() }, range: region_base..region_base + region_size, }) } /// Retrieves the state of the specified thread. The state is an architecture /// specific block of CPU context ie register state. /// /// # Errors /// /// The specified thread id is invalid, or the thread is in a task that is /// compiled for a different architecture than this local task. pub fn read_thread_state(&self, tid: u32) -> Result { let mut thread_state = mach::ThreadState::default(); mach_call!(mach::thread_get_state( tid, mach::THREAD_STATE_FLAVOR as i32, thread_state.state.as_mut_ptr(), &mut thread_state.state_size, ))?; Ok(thread_state) } /// Reads the specified task information. /// /// # Errors /// /// The syscall to receive the task information failed for some reason, eg. /// the specified type and the flavor are mismatched and considered invalid. pub fn task_info(&self) -> Result { let mut info = std::mem::MaybeUninit::::uninit(); let mut count = (std::mem::size_of::() / std::mem::size_of::()) as u32; mach_call!(mach::task::task_info( self.task, T::FLAVOR, info.as_mut_ptr().cast(), &mut count ))?; // SAFETY: this will be initialized if the call succeeded unsafe { Ok(info.assume_init()) } } /// Reads the specified task information. /// /// # Errors /// /// The syscall to receive the task information failed for some reason, eg. /// the specified type and the flavor are mismatched and considered invalid, /// or the thread no longer exists pub fn thread_info(&self, tid: u32) -> Result { let mut thread_info = std::mem::MaybeUninit::::uninit(); let mut count = (std::mem::size_of::() / std::mem::size_of::()) as u32; mach_call!(mach::thread_info( tid, T::FLAVOR, thread_info.as_mut_ptr().cast(), &mut count, ))?; // SAFETY: this will be initialized if the call succeeded unsafe { Ok(thread_info.assume_init()) } } /// Retrieves all of the images loaded in the task. /// /// Note that there may be multiple images with the same load address. /// /// # Errors /// /// The syscall to retrieve the location of the loaded images fails, or /// the syscall to read the loaded images from the process memory fails pub fn read_images(&self) -> Result<(AllImagesInfo, Vec), TaskDumpError> { impl mach::TaskInfo for mach::task_info::task_dyld_info { const FLAVOR: u32 = mach::task_info::TASK_DYLD_INFO; } // Retrieve the address at which the list of loaded images is located // within the task let all_images_addr = { let dyld_info = self.task_info::()?; dyld_info.all_image_info_addr }; // Here we make the assumption that dyld loaded at the same address in // the crashed process vs. this one. This is an assumption made in // "dyld_debug.c" and is said to be nearly always valid. let dyld_all_info_buf = self.read_task_memory::(all_images_addr, std::mem::size_of::())?; // SAFETY: this is fine as long as the kernel isn't lying to us let all_images_info: &AllImagesInfo = unsafe { &*(dyld_all_info_buf.as_ptr().cast()) }; let images = self.read_task_memory::( all_images_info.info_array_addr, all_images_info.info_array_count as usize, )?; Ok((*all_images_info, images)) } /// Retrieves the main executable image for the task. /// /// Note that this method is currently only used for tests due to deficiencies /// in `otool` /// /// # Errors /// /// Any of the errors that apply to [`Self::read_images`] apply here, in /// addition to not being able to find the main executable image pub fn read_executable_image(&self) -> Result { let (_, images) = self.read_images()?; for img in images { let mach_header = self.read_task_memory::(img.load_address, 1)?; let header = &mach_header[0]; if header.magic != mach::MH_MAGIC_64 { return Err(TaskDumpError::InvalidMachHeader); } if header.file_type == mach::MH_EXECUTE { return Ok(img); } } Err(TaskDumpError::NoExecutableImage) } /// Retrieves the load commands for the specified image /// /// # Errors /// /// We fail to read the image header for the specified image, the header we /// read is determined to be invalid, or we fail to read the block of memory /// containing the load commands themselves. pub fn read_load_commands(&self, img: &ImageInfo) -> Result { let mach_header = self.read_task_memory::(img.load_address, 1)?; let header = &mach_header[0]; if header.magic != mach::MH_MAGIC_64 { return Err(TaskDumpError::InvalidMachHeader); } // Read the load commands which immediately follow the image header from // the task memory. Note that load commands vary in size so we need to // retrieve the memory as a raw byte buffer that we can then iterate // through and step according to the size of each load command let load_commands_buf = self.read_task_memory::( img.load_address + std::mem::size_of::() as u64, header.size_commands as usize, )?; Ok(mach::LoadCommands { buffer: load_commands_buf, count: header.num_commands, }) } /// Gets a list of all of the thread ids in the task /// /// # Errors /// /// The syscall to retrieve the list of threads fails pub fn read_threads(&self) -> Result<&'static [u32], TaskDumpError> { let mut threads = std::ptr::null_mut(); let mut thread_count = 0; mach_call!(mach::task_threads( self.task, &mut threads, &mut thread_count ))?; Ok( // SAFETY: This should be valid if the call succeeded unsafe { std::slice::from_raw_parts(threads, thread_count as usize) }, ) } /// Retrieves the PID for the task /// /// # Errors /// /// Presumably the only way this would fail would be if the task we are /// dumping disappears. pub fn pid_for_task(&self) -> Result { let mut pid = 0; mach_call!(mach::pid_for_task(self.task, &mut pid))?; Ok(pid) } }