diff options
Diffstat (limited to 'third_party/rust/minidump-writer/src/mac')
13 files changed, 2721 insertions, 0 deletions
diff --git a/third_party/rust/minidump-writer/src/mac/errors.rs b/third_party/rust/minidump-writer/src/mac/errors.rs new file mode 100644 index 0000000000..96ddb88cad --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/errors.rs @@ -0,0 +1,13 @@ +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum WriterError { + #[error(transparent)] + TaskDumpError(#[from] crate::mac::task_dumper::TaskDumpError), + #[error("Failed to write to memory")] + MemoryWriterError(#[from] crate::mem_writer::MemoryWriterError), + #[error("Failed to write to file")] + FileWriterError(#[from] crate::dir_section::FileWriterError), + #[error("Attempted to write an exception stream with no crash context")] + NoCrashContext, +} diff --git a/third_party/rust/minidump-writer/src/mac/mach.rs b/third_party/rust/minidump-writer/src/mac/mach.rs new file mode 100644 index 0000000000..f95211dc64 --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/mach.rs @@ -0,0 +1,670 @@ +//! Contains various helpers to improve and expand on the bindings provided +//! by `mach2` + +// Just exports all of the mach functions we use into a flat list +pub use mach2::{ + kern_return::{kern_return_t, KERN_SUCCESS}, + port::mach_port_name_t, + task::{self, task_threads}, + task_info, + thread_act::thread_get_state, + traps::mach_task_self, + vm::{mach_vm_deallocate, mach_vm_read, mach_vm_region_recurse}, + vm_region::vm_region_submap_info_64, +}; + +/// A Mach kernel error. +/// +/// See <usr/include/mach/kern_return.h>. +#[derive(thiserror::Error, Debug)] +pub enum KernelError { + #[error("specified address is not currently valid")] + InvalidAddress = 1, + #[error("specified memory is valid, but does not permit the required forms of access")] + ProtectionFailure = 2, + #[error("the address range specified is already in use, or no address range of the size specified could be found")] + NoSpace = 3, + #[error("the function requested was not applicable to this type of argument, or an argument is invalid")] + InvalidArgument = 4, + #[error("the function could not be performed")] + Failure = 5, + #[error("system resource could not be allocated to fulfill this request")] + ResourceShortage = 6, + #[error("the task in question does not hold receive rights for the port argument")] + NotReceiver = 7, + #[error("bogus access restriction")] + NoAccess = 8, + #[error( + "during a page fault, the target address refers to a memory object that has been destroyed" + )] + MemoryFailure = 9, + #[error( + "during a page fault, the memory object indicated that the data could not be returned" + )] + MemoryError = 10, + #[error("the receive right is already a member of the portset")] + AlreadyInSet = 11, + #[error("the receive right is not a member of a port set")] + NotInSet = 12, + #[error("the name already denotes a right in the task")] + NameExists = 13, + #[error("the operation was aborted")] + Aborted = 14, + #[error("the name doesn't denote a right in the task")] + InvalidName = 15, + #[error("target task isn't an active task")] + InvalidTask = 16, + #[error("the name denotes a right, but not an appropriate right")] + InvalidRight = 17, + #[error("a blatant range error")] + InvalidValue = 18, + #[error("operation would overflow limit on user-references")] + UserRefsOverflow = 19, + #[error("the supplied port capability is improper")] + InvalidCapability = 20, + #[error("the task already has send or receive rights for the port under another name")] + RightExists = 21, + #[error("target host isn't actually a host")] + InvalidHost = 22, + #[error("an attempt was made to supply 'precious' data for memory that is already present in a memory object")] + MemoryPresent = 23, + // These 2 are errors which should only ever be seen by the kernel itself + //MemoryDataMoved = 24, + //MemoryRestartCopy = 25, + #[error("an argument applied to assert processor set privilege was not a processor set control port")] + InvalidProcessorSet = 26, + #[error("the specified scheduling attributes exceed the thread's limits")] + PolicyLimit = 27, + #[error("the specified scheduling policy is not currently enabled for the processor set")] + InvalidPolicy = 28, + #[error("the external memory manager failed to initialize the memory object")] + InvalidObject = 29, + #[error( + "a thread is attempting to wait for an event for which there is already a waiting thread" + )] + AlreadyWaiting = 30, + #[error("an attempt was made to destroy the default processor set")] + DefaultSet = 31, + #[error("an attempt was made to fetch an exception port that is protected, or to abort a thread while processing a protected exception")] + ExceptionProtected = 32, + #[error("a ledger was required but not supplied")] + InvalidLedger = 33, + #[error("the port was not a memory cache control port")] + InvalidMemoryControl = 34, + #[error("an argument supplied to assert security privilege was not a host security port")] + InvalidSecurity = 35, + #[error("thread_depress_abort was called on a thread which was not currently depressed")] + NotDepressed = 36, + #[error("object has been terminated and is no longer available")] + Terminated = 37, + #[error("lock set has been destroyed and is no longer available")] + LockSetDestroyed = 38, + #[error("the thread holding the lock terminated before releasing the lock")] + LockUnstable = 39, + #[error("the lock is already owned by another thread")] + LockOwned = 40, + #[error("the lock is already owned by the calling thread")] + LockOwnedSelf = 41, + #[error("semaphore has been destroyed and is no longer available")] + SemaphoreDestroyed = 42, + #[error("return from RPC indicating the target server was terminated before it successfully replied")] + RpcServerTerminated = 43, + #[error("terminate an orphaned activation")] + RpcTerminateOrphan = 44, + #[error("allow an orphaned activation to continue executing")] + RpcContinueOrphan = 45, + #[error("empty thread activation (No thread linked to it)")] + NotSupported = 46, + #[error("remote node down or inaccessible")] + NodeDown = 47, + #[error("a signalled thread was not actually waiting")] + NotWaiting = 48, + #[error("some thread-oriented operation (semaphore_wait) timed out")] + OperationTimedOut = 49, + #[error("during a page fault, indicates that the page was rejected as a result of a signature check")] + CodesignError = 50, + #[error("the requested property cannot be changed at this time")] + PoicyStatic = 51, + #[error("the provided buffer is of insufficient size for the requested data")] + InsufficientBufferSize = 52, + #[error("denied by security policy")] + Denied = 53, + #[error("the KC on which the function is operating is missing")] + MissingKC = 54, + #[error("the KC on which the function is operating is invalid")] + InvalidKC = 55, + #[error("a search or query operation did not return a result")] + NotFound = 56, +} + +impl From<mach2::kern_return::kern_return_t> for KernelError { + fn from(kr: mach2::kern_return::kern_return_t) -> Self { + use mach2::kern_return::*; + + match kr { + KERN_INVALID_ADDRESS => Self::InvalidAddress, + KERN_PROTECTION_FAILURE => Self::ProtectionFailure, + KERN_NO_SPACE => Self::NoSpace, + KERN_INVALID_ARGUMENT => Self::InvalidArgument, + KERN_FAILURE => Self::Failure, + KERN_RESOURCE_SHORTAGE => Self::ResourceShortage, + KERN_NOT_RECEIVER => Self::NotReceiver, + KERN_NO_ACCESS => Self::NoAccess, + KERN_MEMORY_FAILURE => Self::MemoryFailure, + KERN_MEMORY_ERROR => Self::MemoryError, + KERN_ALREADY_IN_SET => Self::AlreadyInSet, + KERN_NAME_EXISTS => Self::NameExists, + KERN_INVALID_NAME => Self::InvalidName, + KERN_INVALID_TASK => Self::InvalidTask, + KERN_INVALID_RIGHT => Self::InvalidRight, + KERN_INVALID_VALUE => Self::InvalidValue, + KERN_UREFS_OVERFLOW => Self::UserRefsOverflow, + KERN_INVALID_CAPABILITY => Self::InvalidCapability, + KERN_RIGHT_EXISTS => Self::RightExists, + KERN_INVALID_HOST => Self::InvalidHost, + KERN_MEMORY_PRESENT => Self::MemoryPresent, + KERN_INVALID_PROCESSOR_SET => Self::InvalidProcessorSet, + KERN_POLICY_LIMIT => Self::PolicyLimit, + KERN_INVALID_POLICY => Self::InvalidPolicy, + KERN_INVALID_OBJECT => Self::InvalidObject, + KERN_ALREADY_WAITING => Self::AlreadyWaiting, + KERN_DEFAULT_SET => Self::DefaultSet, + KERN_EXCEPTION_PROTECTED => Self::ExceptionProtected, + KERN_INVALID_LEDGER => Self::InvalidLedger, + KERN_INVALID_MEMORY_CONTROL => Self::InvalidMemoryControl, + KERN_INVALID_SECURITY => Self::InvalidSecurity, + KERN_NOT_DEPRESSED => Self::NotDepressed, + KERN_TERMINATED => Self::Terminated, + KERN_LOCK_SET_DESTROYED => Self::LockSetDestroyed, + KERN_LOCK_UNSTABLE => Self::LockUnstable, + KERN_LOCK_OWNED => Self::LockOwned, + KERN_LOCK_OWNED_SELF => Self::LockOwnedSelf, + KERN_SEMAPHORE_DESTROYED => Self::SemaphoreDestroyed, + KERN_RPC_SERVER_TERMINATED => Self::RpcServerTerminated, + KERN_RPC_TERMINATE_ORPHAN => Self::RpcTerminateOrphan, + KERN_RPC_CONTINUE_ORPHAN => Self::RpcContinueOrphan, + KERN_NOT_SUPPORTED => Self::NotSupported, + KERN_NODE_DOWN => Self::NodeDown, + KERN_NOT_WAITING => Self::NotWaiting, + KERN_OPERATION_TIMED_OUT => Self::OperationTimedOut, + KERN_CODESIGN_ERROR => Self::CodesignError, + KERN_POLICY_STATIC => Self::PoicyStatic, + 52 => Self::InsufficientBufferSize, + 53 => Self::Denied, + 54 => Self::MissingKC, + 55 => Self::InvalidKC, + 56 => Self::NotFound, + // This should never happen given a result from a mach call, but + // in that case we just use `Failure` as the mach header itself + // describes it as a catch all + _ => Self::Failure, + } + } +} + +// From /usr/include/mach/machine/thread_state.h +pub const THREAD_STATE_MAX: usize = 1296; + +cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + /// x86_THREAD_STATE64 in /usr/include/mach/i386/thread_status.h + pub const THREAD_STATE_FLAVOR: u32 = 4; + + pub type ArchThreadState = mach2::structs::x86_thread_state64_t; + } else if #[cfg(target_arch = "aarch64")] { + /// ARM_THREAD_STATE64 in /usr/include/mach/arm/thread_status.h + pub const THREAD_STATE_FLAVOR: u32 = 6; + + // Missing from mach2 atm + // _STRUCT_ARM_THREAD_STATE64 from /usr/include/mach/arm/_structs.h + #[repr(C)] + pub struct Arm64ThreadState { + pub x: [u64; 29], + pub fp: u64, + pub lr: u64, + pub sp: u64, + pub pc: u64, + pub cpsr: u32, + __pad: u32, + } + + pub type ArchThreadState = Arm64ThreadState; + } else { + compile_error!("unsupported target arch"); + } +} + +#[repr(C, align(8))] +pub struct ThreadState { + pub state: [u32; THREAD_STATE_MAX], + pub state_size: u32, +} + +impl Default for ThreadState { + fn default() -> Self { + Self { + state: [0u32; THREAD_STATE_MAX], + state_size: (THREAD_STATE_MAX * std::mem::size_of::<u32>()) as u32, + } + } +} + +impl ThreadState { + /// Gets the program counter + #[inline] + pub fn pc(&self) -> u64 { + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + self.arch_state().__rip + } else if #[cfg(target_arch = "aarch64")] { + self.arch_state().pc + } + } + } + + /// Gets the stack pointer + #[inline] + pub fn sp(&self) -> u64 { + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + self.arch_state().__rsp + } else if #[cfg(target_arch = "aarch64")] { + self.arch_state().sp + } + } + } + + /// Converts the raw binary blob into the architecture specific state + #[inline] + pub fn arch_state(&self) -> &ArchThreadState { + // SAFETY: hoping the kernel isn't lying + unsafe { &*(self.state.as_ptr().cast()) } + } +} + +/// Minimal trait that just pairs a structure that can be filled out by +/// [`mach2::task::task_info`] with the "flavor" that tells it the info we +/// actually want to retrieve +pub trait TaskInfo { + /// One of the `MACH_*_TASK` integers. I assume it's very bad if you implement + /// this trait and provide the wrong flavor for the struct + const FLAVOR: u32; +} + +/// Minimal trait that just pairs a structure that can be filled out by +/// [`thread_info`] with the "flavor" that tells it the info we +/// actually want to retrieve +pub trait ThreadInfo { + /// One of the `THREAD_*` integers. I assume it's very bad if you implement + /// this trait and provide the wrong flavor for the struct + const FLAVOR: u32; +} + +/// <usr/include/mach-o/loader.h>, the file type for the main executable image +pub const MH_EXECUTE: u32 = 0x2; +/// <usr/include/mach-o/loader.h>, the file type dyld, the dynamic loader +pub const MH_DYLINKER: u32 = 0x7; +// usr/include/mach-o/loader.h, magic number for MachHeader +pub const MH_MAGIC_64: u32 = 0xfeedfacf; + +/// Load command constants from usr/include/mach-o/loader.h +#[repr(u32)] +#[derive(Debug)] +pub enum LoadCommandKind { + /// Command to map a segment + Segment = 0x19, + /// Dynamically linked shared lib ident + IdDylib = 0xd, + /// Image uuid + Uuid = 0x1b, + /// Load a dynamic linker. Should only be on MH_EXECUTE (main executable) + /// images when the dynamic linker is overriden + LoadDylinker = 0xe, + /// Dynamic linker identification + IdDylinker = 0xf, +} + +impl LoadCommandKind { + #[inline] + fn from_u32(kind: u32) -> Option<Self> { + Some(if kind == Self::Segment as u32 { + Self::Segment + } else if kind == Self::IdDylib as u32 { + Self::IdDylib + } else if kind == Self::Uuid as u32 { + Self::Uuid + } else if kind == Self::LoadDylinker as u32 { + Self::LoadDylinker + } else if kind == Self::IdDylinker as u32 { + Self::IdDylinker + } else { + return None; + }) + } +} + +/// The header at the beginning of every (valid) Mach image +/// +/// <usr/include/mach-o/loader.h> +#[repr(C)] +#[derive(Clone)] +pub struct MachHeader { + /// Mach magic number identifier, this is used to validate the header is valid + pub magic: u32, + /// `cpu_type_t` cpu specifier + pub cpu_type: i32, + /// `cpu_subtype_t` machine specifier + pub cpu_sub_type: i32, + /// Type of file, eg. [`MH_EXECUTE`] for the main executable + pub file_type: u32, + /// Number of load commands for the image + pub num_commands: u32, + /// Size in bytes of all of the load commands + pub size_commands: u32, + pub flags: u32, + __reserved: u32, +} + +/// Every load command is a variable sized struct depending on its type, but +/// they all include the fields in this struct at the beginning +/// +/// <usr/include/mach-o/loader.h> +#[repr(C)] +pub struct LoadCommandBase { + /// Type of load command `LC_*` + pub cmd: u32, + /// Total size of the command in bytes + pub cmd_size: u32, +} + +/// The 64-bit segment load command indicates that a part of this file is to be +/// mapped into a 64-bit task's address space. If the 64-bit segment has +/// sections then section_64 structures directly follow the 64-bit segment +/// command and their size is reflected in `cmdsize`. +#[repr(C)] +pub struct SegmentCommand64 { + cmd: u32, + pub cmd_size: u32, + /// String name of the section + pub segment_name: [u8; 16], + /// Memory address the segment is mapped to + pub vm_addr: u64, + /// Total size of the segment + pub vm_size: u64, + /// File offset of the segment + pub file_off: u64, + /// Amount mapped from the file + pub file_size: u64, + /// Maximum VM protection + pub max_prot: i32, + /// Initial VM protection + pub init_prot: i32, + /// Number of sections in the segment + pub num_sections: u32, + pub flags: u32, +} + +/// Dynamically linked shared libraries are identified by two things. The +/// pathname (the name of the library as found for execution), and the +/// compatibility version number. The pathname must match and the compatibility +/// number in the user of the library must be greater than or equal to the +/// library being used. The time stamp is used to record the time a library was +/// built and copied into user so it can be use to determined if the library used +/// at runtime is exactly the same as used to built the program. +#[repr(C)] +#[derive(Debug)] +pub struct Dylib { + /// Offset from the load command start to the pathname + pub name: u32, + /// Library's build time stamp + pub timestamp: u32, + /// Library's current version number + pub current_version: u32, + /// Library's compatibility version number + pub compatibility_version: u32, +} + +/// A dynamically linked shared library (filetype == MH_DYLIB in the mach header) +/// contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. +/// An object that uses a dynamically linked shared library also contains a +/// dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or +/// LC_REEXPORT_DYLIB) for each library it uses. +#[repr(C)] +pub struct DylibCommand { + cmd: u32, + /// Total size of the command in bytes, including pathname string + pub cmd_size: u32, + /// Library identification + pub dylib: Dylib, +} + +/// A program that uses a dynamic linker contains a dylinker_command to identify +/// the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker +/// contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER). +/// A file can have at most one of these. +/// This struct is also used for the LC_DYLD_ENVIRONMENT load command and +/// contains string for dyld to treat like environment variable. +#[repr(C)] +struct DylinkerCommandRepr { + /// LC_ID_DYLINKER, LC_LOAD_DYLINKER or LC_DYLD_ENVIRONMENT + cmd: u32, + /// includes pathname string + cmd_size: u32, + /// Dynamic linker's path name, an offset from the load command address + name: u32, +} + +pub struct DylinkerCommand<'buf> { + /// LC_ID_DYLINKER, LC_LOAD_DYLINKER or LC_DYLD_ENVIRONMENT + pub cmd: u32, + /// includes pathname string + pub cmd_size: u32, + /// The offset from the load command where the path was read + pub name_offset: u32, + /// Dynamic linker's path name + pub name: &'buf str, +} + +/// The uuid load command contains a single 128-bit unique random number that +/// identifies an object produced by the static link editor. +#[repr(C)] +pub struct UuidCommand { + cmd: u32, + pub cmd_size: u32, + /// The UUID. The components are in big-endian regardless of the host architecture + pub uuid: [u8; 16], +} + +/// A block of load commands for a particular image +pub struct LoadCommands { + /// The block of memory containing all of the load commands + pub buffer: Vec<u8>, + /// The number of actual load commmands that _should_ be in the buffer + pub count: u32, +} + +impl LoadCommands { + /// Retrieves an iterator over the load commands in the contained buffer + #[inline] + pub fn iter(&self) -> LoadCommandsIter<'_> { + LoadCommandsIter { + buffer: &self.buffer, + count: self.count, + } + } +} + +/// A single load command +pub enum LoadCommand<'buf> { + Segment(&'buf SegmentCommand64), + Dylib(&'buf DylibCommand), + Uuid(&'buf UuidCommand), + DylinkerCommand(DylinkerCommand<'buf>), +} + +pub struct LoadCommandsIter<'buf> { + buffer: &'buf [u8], + count: u32, +} + +impl<'buf> Iterator for LoadCommandsIter<'buf> { + type Item = LoadCommand<'buf>; + + fn next(&mut self) -> Option<Self::Item> { + // SAFETY: we're interpreting raw bytes as C structs, we try and be safe + unsafe { + loop { + if self.count == 0 || self.buffer.len() < std::mem::size_of::<LoadCommandBase>() { + return None; + } + + let header = &*(self.buffer.as_ptr().cast::<LoadCommandBase>()); + + // This would mean we've been lied to by the MachHeader and either + // the size_commands field was too small, or the num_command was + // too large + if header.cmd_size as usize > self.buffer.len() { + return None; + } + + let cmd = LoadCommandKind::from_u32(header.cmd).and_then(|kind| { + Some(match kind { + LoadCommandKind::Segment => LoadCommand::Segment( + &*(self.buffer.as_ptr().cast::<SegmentCommand64>()), + ), + LoadCommandKind::IdDylib => { + LoadCommand::Dylib(&*(self.buffer.as_ptr().cast::<DylibCommand>())) + } + LoadCommandKind::Uuid => { + LoadCommand::Uuid(&*(self.buffer.as_ptr().cast::<UuidCommand>())) + } + LoadCommandKind::LoadDylinker | LoadCommandKind::IdDylinker => { + let dcr = &*(self.buffer.as_ptr().cast::<DylinkerCommandRepr>()); + + let nul = self.buffer[dcr.name as usize..header.cmd_size as usize] + .iter() + .position(|c| *c == 0)?; + + LoadCommand::DylinkerCommand(DylinkerCommand { + cmd: dcr.cmd, + cmd_size: dcr.cmd_size, + name_offset: dcr.name, + name: std::str::from_utf8( + &self.buffer[dcr.name as usize..dcr.name as usize + nul], + ) + .ok()?, + }) + } + }) + }); + + self.count -= 1; + self.buffer = &self.buffer[header.cmd_size as usize..]; + + if let Some(cmd) = cmd { + return Some(cmd); + } + } + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let sz = self.count as usize; + (sz, Some(sz)) + } +} + +/// Retrieves an integer sysctl by name. Returns the default value if retrieval +/// fails. +pub fn sysctl_by_name<T: Sized + Default>(name: &[u8]) -> T { + let mut out = T::default(); + let mut len = std::mem::size_of_val(&out); + + // SAFETY: syscall + unsafe { + if libc::sysctlbyname( + name.as_ptr().cast(), + (&mut out as *mut T).cast(), + &mut len, + std::ptr::null_mut(), + 0, + ) != 0 + { + // log? + T::default() + } else { + out + } + } +} + +/// Retrieves an `i32` sysctl by name and casts it to the specified integer type. +/// Returns the default value if retrieval fails or the value is out of bounds of +/// the specified integer type. +pub fn int_sysctl_by_name<T: TryFrom<i32> + Default>(name: &[u8]) -> T { + let val = sysctl_by_name::<i32>(name); + T::try_from(val).unwrap_or_default() +} + +/// Retrieves a string sysctl by name. Returns an empty string if the retrieval +/// fails or the string can't be converted to utf-8. +pub fn sysctl_string(name: &[u8]) -> String { + let mut buf_len = 0; + + // SAFETY: syscalls + let string_buf = unsafe { + // Retrieve the size of the string (including null terminator) + if libc::sysctlbyname( + name.as_ptr().cast(), + std::ptr::null_mut(), + &mut buf_len, + std::ptr::null_mut(), + 0, + ) != 0 + || buf_len <= 1 + { + return String::new(); + } + + let mut buff = Vec::new(); + buff.resize(buf_len, 0); + + if libc::sysctlbyname( + name.as_ptr().cast(), + buff.as_mut_ptr().cast(), + &mut buf_len, + std::ptr::null_mut(), + 0, + ) != 0 + { + return String::new(); + } + + buff.pop(); // remove null terminator + buff + }; + + String::from_utf8(string_buf).unwrap_or_default() +} + +extern "C" { + /// From <usr/include/mach/mach_traps.h>, this retrieves the normal PID for + /// the specified task as the syscalls from BSD use PIDs, not mach ports. + /// + /// This seems to be marked as "obsolete" in the header, but of course being + /// Apple, there is no mention of a replacement function or when/if it might + /// eventually disappear. + pub fn pid_for_task(task: mach_port_name_t, pid: *mut i32) -> kern_return_t; + + /// Fomr <user/include/mach/thread_act.h>, this retrieves thread info for the + /// for the specified thread. + /// + /// Note that the info_size parameter is actually the size of the thread_info / 4 + /// as it is the number of words in the thread info + pub fn thread_info( + thread: u32, + flavor: u32, + thread_info: *mut i32, + info_size: *mut u32, + ) -> kern_return_t; +} diff --git a/third_party/rust/minidump-writer/src/mac/minidump_writer.rs b/third_party/rust/minidump-writer/src/mac/minidump_writer.rs new file mode 100644 index 0000000000..b05662fd21 --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/minidump_writer.rs @@ -0,0 +1,187 @@ +use crate::{ + dir_section::{DirSection, DumpBuf}, + mac::{errors::WriterError, task_dumper::TaskDumper}, + mem_writer::*, + minidump_format::{self, MDMemoryDescriptor, MDRawDirectory, MDRawHeader}, +}; +use std::io::{Seek, Write}; + +pub use mach2::mach_types::{task_t, thread_t}; + +type Result<T> = std::result::Result<T, WriterError>; + +pub struct MinidumpWriter { + /// The crash context as captured by an exception handler + pub(crate) crash_context: Option<crash_context::CrashContext>, + /// List of raw blocks of memory we've written into the stream. These are + /// referenced by other streams (eg thread list) + pub(crate) memory_blocks: Vec<MDMemoryDescriptor>, + /// The task being dumped + pub(crate) task: task_t, + /// The handler thread, so it can be ignored/deprioritized + pub(crate) handler_thread: thread_t, +} + +impl MinidumpWriter { + /// Creates a minidump writer for the specified mach task (process) and + /// handler thread. If not specified, defaults to the current task and thread. + /// + /// ``` + /// use minidump_writer::{minidump_writer::MinidumpWriter, mach2}; + /// + /// // Note that this is the same as specifying `None` for both the task and + /// // handler thread, this is just meant to illustrate how you can setup + /// // a MinidumpWriter manually instead of using a `CrashContext` + /// // SAFETY: syscalls + /// let mdw = unsafe { + /// MinidumpWriter::new( + /// Some(mach2::traps::mach_task_self()), + /// Some(mach2::mach_init::mach_thread_self()), + /// ) + /// }; + /// ``` + pub fn new(task: Option<task_t>, handler_thread: Option<thread_t>) -> Self { + Self { + crash_context: None, + memory_blocks: Vec::new(), + task: task.unwrap_or_else(|| { + // SAFETY: syscall + unsafe { mach2::traps::mach_task_self() } + }), + handler_thread: handler_thread.unwrap_or_else(|| { + // SAFETY: syscall + unsafe { mach2::mach_init::mach_thread_self() } + }), + } + } + + /// Creates a minidump writer with the specified crash context, presumably + /// for another task + pub fn with_crash_context(crash_context: crash_context::CrashContext) -> Self { + let task = crash_context.task; + let handler_thread = crash_context.handler_thread; + + Self { + crash_context: Some(crash_context), + memory_blocks: Vec::new(), + task, + handler_thread, + } + } + + /// Writes a minidump to the specified destination, returning the raw minidump + /// contents upon success + pub fn dump(&mut self, destination: &mut (impl Write + Seek)) -> Result<Vec<u8>> { + let writers = { + #[allow(clippy::type_complexity)] + let mut writers: Vec< + Box<dyn FnMut(&mut Self, &mut DumpBuf, &TaskDumper) -> Result<MDRawDirectory>>, + > = vec![ + Box::new(|mw, buffer, dumper| mw.write_thread_list(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_memory_list(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_system_info(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_module_list(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_misc_info(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_breakpad_info(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_thread_names(buffer, dumper)), + ]; + + // Exception stream needs to be the last entry in this array as it may + // be omitted in the case where the minidump is written without an + // exception. + if self + .crash_context + .as_ref() + .and_then(|cc| cc.exception.as_ref()) + .is_some() + { + writers.push(Box::new(|mw, buffer, dumper| { + mw.write_exception(buffer, dumper) + })); + } + + writers + }; + + let num_writers = writers.len() as u32; + let mut buffer = Buffer::with_capacity(0); + + let mut header_section = MemoryWriter::<MDRawHeader>::alloc(&mut buffer)?; + let mut dir_section = DirSection::new(&mut buffer, num_writers, destination)?; + + let header = MDRawHeader { + signature: minidump_format::MD_HEADER_SIGNATURE, + version: minidump_format::MD_HEADER_VERSION, + stream_count: num_writers, + stream_directory_rva: dir_section.position(), + checksum: 0, /* Can be 0. In fact, that's all that's + * been found in minidump files. */ + time_date_stamp: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() as u32, // TODO: This is not Y2038 safe, but thats how its currently defined as + flags: 0, + }; + header_section.set_value(&mut buffer, header)?; + + // Ensure the header gets flushed. If we crash somewhere below, + // we should have a mostly-intact dump + dir_section.write_to_file(&mut buffer, None)?; + + let dumper = super::task_dumper::TaskDumper::new(self.task); + + for mut writer in writers { + let dirent = writer(self, &mut buffer, &dumper)?; + dir_section.write_to_file(&mut buffer, Some(dirent))?; + } + + Ok(buffer.into()) + } + + /// Retrieves the list of active threads in the target process, except + /// the handler thread if it is known, to simplify dump analysis + #[inline] + pub(crate) fn threads(&self, dumper: &TaskDumper) -> ActiveThreads { + ActiveThreads { + threads: dumper.read_threads().unwrap_or_default(), + handler_thread: self.handler_thread, + i: 0, + } + } +} + +pub(crate) struct ActiveThreads { + threads: &'static [u32], + handler_thread: u32, + i: usize, +} + +impl ActiveThreads { + #[inline] + pub(crate) fn len(&self) -> usize { + let mut len = self.threads.len(); + + if self.handler_thread != mach2::port::MACH_PORT_NULL { + len -= 1; + } + + len + } +} + +impl Iterator for ActiveThreads { + type Item = u32; + + fn next(&mut self) -> Option<Self::Item> { + while self.i < self.threads.len() { + let i = self.i; + self.i += 1; + + if self.threads[i] != self.handler_thread { + return Some(self.threads[i]); + } + } + + None + } +} diff --git a/third_party/rust/minidump-writer/src/mac/streams.rs b/third_party/rust/minidump-writer/src/mac/streams.rs new file mode 100644 index 0000000000..bec3b22597 --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/streams.rs @@ -0,0 +1,16 @@ +mod breakpad_info; +mod exception; +mod memory_list; +mod misc_info; +mod module_list; +mod system_info; +mod thread_list; +mod thread_names; + +use super::{ + errors::WriterError, + mach, + minidump_writer::MinidumpWriter, + task_dumper::{self, ImageInfo, TaskDumpError, TaskDumper}, +}; +use crate::{dir_section::DumpBuf, mem_writer::*, minidump_format::*}; diff --git a/third_party/rust/minidump-writer/src/mac/streams/breakpad_info.rs b/third_party/rust/minidump-writer/src/mac/streams/breakpad_info.rs new file mode 100644 index 0000000000..5196a95cac --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/streams/breakpad_info.rs @@ -0,0 +1,34 @@ +use super::*; +use format::{BreakpadInfoValid, MINIDUMP_BREAKPAD_INFO as BreakpadInfo}; + +impl MinidumpWriter { + /// Writes the [`BreakpadInfo`] stream. + /// + /// For MacOS the primary use of this stream is to differentiate between + /// the thread that actually raised an exception, and the thread on which + /// the exception port was listening, so that the exception port (handler) + /// thread can be deprioritized/ignored when analyzing the minidump. + pub(crate) fn write_breakpad_info( + &mut self, + buffer: &mut DumpBuf, + _dumper: &TaskDumper, + ) -> Result<MDRawDirectory, WriterError> { + let bp_section = MemoryWriter::<BreakpadInfo>::alloc_with_val( + buffer, + BreakpadInfo { + validity: BreakpadInfoValid::DumpThreadId.bits() + | BreakpadInfoValid::RequestingThreadId.bits(), + // The thread where the exception port handled the exception, might + // be useful to ignore/deprioritize when processing the minidump + dump_thread_id: self.handler_thread, + // The actual thread where the exception was thrown + requesting_thread_id: self.crash_context.as_ref().map(|cc| cc.thread).unwrap_or(0), + }, + )?; + + Ok(MDRawDirectory { + stream_type: MDStreamType::BreakpadInfoStream as u32, + location: bp_section.location(), + }) + } +} diff --git a/third_party/rust/minidump-writer/src/mac/streams/exception.rs b/third_party/rust/minidump-writer/src/mac/streams/exception.rs new file mode 100644 index 0000000000..e594dd8d95 --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/streams/exception.rs @@ -0,0 +1,176 @@ +use super::*; + +use mach2::exception_types as et; + +impl MinidumpWriter { + /// Writes the [`minidump_common::format::MINIDUMP_EXCEPTION_STREAM`] stream. + /// + /// This stream is optional on MacOS as a user requested minidump could + /// choose not to specify the exception information. + pub(crate) fn write_exception( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result<MDRawDirectory, WriterError> { + // This shouldn't fail since we won't be writing this stream if the crash context is + // not present + let crash_context = self + .crash_context + .as_ref() + .ok_or(WriterError::NoCrashContext)?; + + let thread_state = dumper.read_thread_state(crash_context.thread).ok(); + + let thread_context = if let Some(ts) = &thread_state { + let mut cpu = Default::default(); + Self::fill_cpu_context(ts, &mut cpu); + MemoryWriter::alloc_with_val(buffer, cpu) + .map(|mw| mw.location()) + .ok() + } else { + None + }; + + let exception_record = crash_context + .exception + .as_ref() + .map(|exc| { + let code = exc.code as u64; + + // `EXC_CRASH` exceptions wrap other exceptions, so we want to + // retrieve the _actual_ exception + let wrapped_exc = if exc.kind as u32 == et::EXC_CRASH { + recover_exc_crash_wrapped_exception(code) + } else { + None + }; + + // For EXC_RESOURCE and EXC_GUARD crashes Crashpad records the + // uppermost 32 bits of the exception code in the exception flags, + // as they are the most interesting for those exceptions. Neither + // of these exceptions can be wrapped by an `EXC_CRASH` + // + // EXC_GUARD + // code: + // +-------------------+----------------+--------------+ + // |[63:61] guard type | [60:32] flavor | [31:0] target| + // +-------------------+----------------+--------------+ + // + // EXC_RESOURCE + // code: + // +--------------------------------------------------------+ + // |[63:61] resource type | [60:58] flavor | [57:32] unused | + // +--------------------------------------------------------+ + let exception_code = + if exc.kind as u32 == et::EXC_RESOURCE || exc.kind as u32 == et::EXC_GUARD { + (code >> 32) as u32 + } else if let Some(wrapped) = wrapped_exc { + wrapped.code + } else { + // For all other exceptions types, the value in the code + // _should_ never exceed 32 bits, crashpad does an actual + // range check here, but since we don't really log anything + // else at the moment I'll punt that for now + // TODO: log/do something if exc.code > u32::MAX + code as u32 + }; + + let exception_kind = if let Some(wrapped) = wrapped_exc { + wrapped.kind + } else { + exc.kind + }; + + let exception_address = + if exception_kind == et::EXC_BAD_ACCESS && exc.subcode.is_some() { + exc.subcode.unwrap_or_default() + } else if let Some(ts) = thread_state { + ts.pc() + } else { + 0 + }; + + // The naming is confusing here, but it is how it is + let mut md_exc = MDException { + exception_code: exception_kind, + exception_flags: exception_code, + exception_address, + ..Default::default() + }; + + // Now append the (mostly) original information to the "ancillary" + // exception_information at the end. This allows a minidump parser + // to recover the full exception information for the crash, rather + // than only using the (potentially) truncated information we + // just set in `exception_code` and `exception_flags` + md_exc.exception_information[0] = exception_kind as u64; + md_exc.exception_information[1] = code; + + md_exc.number_parameters = if let Some(subcode) = exc.subcode { + md_exc.exception_information[2] = subcode; + 3 + } else { + 2 + }; + + md_exc + }) + .unwrap_or_default(); + + let stream = MDRawExceptionStream { + thread_id: crash_context.thread, + exception_record, + thread_context: thread_context.unwrap_or_default(), + __align: 0, + }; + + let exc_section = MemoryWriter::<MDRawExceptionStream>::alloc_with_val(buffer, stream)?; + + Ok(MDRawDirectory { + stream_type: MDStreamType::ExceptionStream as u32, + location: exc_section.location(), + }) + } +} + +/// [`et::EXC_CRASH`] is a wrapper exception around another exception, but not +/// all exceptions can be wrapped by it, so this function validates that the +/// `EXC_CRASH` is actually valid +#[inline] +fn is_valid_exc_crash(exc_code: u64) -> bool { + let wrapped = ((exc_code >> 20) & 0xf) as u32; + + !( + wrapped == et::EXC_CRASH // EXC_CRASH can't wrap another one + || wrapped == et::EXC_RESOURCE // EXC_RESOURCE would lose information + || wrapped == et::EXC_GUARD // EXC_GUARD would lose information + || wrapped == et::EXC_CORPSE_NOTIFY + // cannot be wrapped + ) +} + +/// The details for an exception wrapped by an `EXC_CRASH` +#[derive(Copy, Clone)] +struct WrappedException { + /// The `EXC_*` that was wrapped + kind: u32, + /// The code of the wrapped exception, for all exceptions other than + /// `EXC_RESOURCE` and `EXC_GUARD` this _should_ never exceed 32 bits, and + /// is one of the reasons that `EXC_CRASH` cannot wrap those 2 exceptions + code: u32, + /// The Unix signal number that the original exception was converted into + _signal: u8, +} + +/// Unwraps an `EXC_CRASH` exception code to the inner exception it wraps. +/// +/// Will return `None` if the specified code is wrapping an exception that +/// should not be possible to be wrapped in an `EXC_CRASH` +#[inline] +fn recover_exc_crash_wrapped_exception(code: u64) -> Option<WrappedException> { + is_valid_exc_crash(code).then(|| WrappedException { + kind: ((code >> 20) & 0xf) as u32, + code: (code & 0xfffff) as u32, + _signal: ((code >> 24) & 0xff) as u8, + }) +} diff --git a/third_party/rust/minidump-writer/src/mac/streams/memory_list.rs b/third_party/rust/minidump-writer/src/mac/streams/memory_list.rs new file mode 100644 index 0000000000..47a37fbfd6 --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/streams/memory_list.rs @@ -0,0 +1,72 @@ +use super::*; + +impl MinidumpWriter { + /// Writes the [`MDStreamType::MemoryListStream`]. The memory blocks that are + /// written into this stream are the raw thread contexts that were retrieved + /// and added by [`Self::write_thread_list`] + pub(crate) fn write_memory_list( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result<MDRawDirectory, WriterError> { + // Include some memory around the instruction pointer if the crash was + // due to an exception + if let Some(cc) = &self.crash_context { + if cc.exception.is_some() { + const IP_MEM_SIZE: u64 = 256; + + let get_ip_block = |tid| -> Option<std::ops::Range<u64>> { + let thread_state = dumper.read_thread_state(tid).ok()?; + + let ip = thread_state.pc(); + + // Bound it to the upper and lower bounds of the region + // it's contained within. If it's not in a known memory region, + // don't bother trying to write it. + let region = dumper.get_vm_region(ip).ok()?; + + if ip < region.range.start || ip > region.range.end { + return None; + } + + // Try to get IP_MEM_SIZE / 2 bytes before and after the IP, but + // settle for whatever's available. + let start = std::cmp::max(region.range.start, ip - IP_MEM_SIZE / 2); + let end = std::cmp::min(ip + IP_MEM_SIZE / 2, region.range.end); + + Some(start..end) + }; + + if let Some(ip_range) = get_ip_block(cc.thread) { + let size = ip_range.end - ip_range.start; + let stack_buffer = + dumper.read_task_memory(ip_range.start as _, size as usize)?; + let ip_location = MDLocationDescriptor { + data_size: size as u32, + rva: buffer.position() as u32, + }; + buffer.write_all(&stack_buffer); + + self.memory_blocks.push(MDMemoryDescriptor { + start_of_memory_range: ip_range.start, + memory: ip_location, + }); + } + } + } + + let list_header = + MemoryWriter::<u32>::alloc_with_val(buffer, self.memory_blocks.len() as u32)?; + + let mut dirent = MDRawDirectory { + stream_type: MDStreamType::MemoryListStream as u32, + location: list_header.location(), + }; + + let block_list = + MemoryArrayWriter::<MDMemoryDescriptor>::alloc_from_array(buffer, &self.memory_blocks)?; + + dirent.location.data_size += block_list.location().data_size; + Ok(dirent) + } +} diff --git a/third_party/rust/minidump-writer/src/mac/streams/misc_info.rs b/third_party/rust/minidump-writer/src/mac/streams/misc_info.rs new file mode 100644 index 0000000000..629b94cee6 --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/streams/misc_info.rs @@ -0,0 +1,179 @@ +use super::*; +use format::{MiscInfoFlags, MINIDUMP_MISC_INFO_2 as MDRawMiscInfo}; +use std::time::Duration; + +/// From <usr/include/mach/time_value.h> +#[repr(C)] +#[derive(Copy, Clone)] +struct TimeValue { + seconds: i32, + microseconds: i32, +} + +impl From<TimeValue> for Duration { + fn from(tv: TimeValue) -> Self { + let mut seconds = tv.seconds as u64; + let mut microseconds = tv.microseconds as u32; + // This _probably_ will never happen, but this will avoid a panic in + // Duration::new() if it does + if tv.microseconds >= 1000000 { + seconds += 1; + microseconds -= 1000000; + } + + Duration::new(seconds, microseconds * 1000) + } +} + +/// From <usr/include/mach/task_info.h>, this includes basic information about +/// a task. +#[repr(C, packed(4))] +struct MachTaskBasicInfo { + /// Virtual memory size in bytes + virtual_size: u64, + /// Resident memory size in bytes + resident_size: u64, + /// Maximum resident memory size in bytes + resident_size_max: u64, + /// Total user run time for terminated threads + user_time: TimeValue, + /// Total system run time for terminated threads + system_time: TimeValue, + /// Default policy for new threads + policy: i32, + /// Suspend count for task + suspend_count: i32, +} + +impl mach::TaskInfo for MachTaskBasicInfo { + const FLAVOR: u32 = mach::task_info::MACH_TASK_BASIC_INFO; +} + +/// From <usr/include/mach/task_info.h>, this includes times for currently +/// live threads in the task. +#[repr(C, packed(4))] +struct TaskThreadsTimeInfo { + /// Total user run time for live threads + user_time: TimeValue, + /// total system run time for live threads + system_time: TimeValue, +} + +impl mach::TaskInfo for TaskThreadsTimeInfo { + const FLAVOR: u32 = mach::task_info::TASK_THREAD_TIMES_INFO; +} + +impl MinidumpWriter { + /// Writes the [`MDStreamType::MiscInfoStream`] stream. + /// + /// On MacOS, we write a [`minidump_common::format::MINIDUMP_MISC_INFO_2`] + /// to this stream, which includes the start time of the process at second + /// granularity, and the (approximate) amount of time spent in user and + /// system (kernel) time for the lifetime of the task. We attempt to also + /// retrieve power ie CPU usage statistics, though this information is only + /// currently available on x86_64, not aarch64 at the moment. + pub(crate) fn write_misc_info( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result<MDRawDirectory, WriterError> { + let mut info_section = MemoryWriter::<MDRawMiscInfo>::alloc(buffer)?; + let dirent = MDRawDirectory { + stream_type: MDStreamType::MiscInfoStream as u32, + location: info_section.location(), + }; + + let pid = dumper.pid_for_task()?; + + let mut misc_info = MDRawMiscInfo { + size_of_info: std::mem::size_of::<MDRawMiscInfo>() as u32, + flags1: MiscInfoFlags::MINIDUMP_MISC1_PROCESS_ID.bits() + | MiscInfoFlags::MINIDUMP_MISC1_PROCESS_TIMES.bits() + | MiscInfoFlags::MINIDUMP_MISC1_PROCESSOR_POWER_INFO.bits(), + process_id: pid as u32, + process_create_time: 0, + process_user_time: 0, + process_kernel_time: 0, + processor_max_mhz: 0, + processor_current_mhz: 0, + processor_mhz_limit: 0, + processor_max_idle_state: 0, + processor_current_idle_state: 0, + }; + + // Note that both Breakpad and Crashpad use `sysctl CTL_KERN, KERN_PROC, KERN_PROC_PID` + // to retrieve the process start time, but none of the structures that + // are filled in by that call are in libc at the moment, and `proc_pidinfo` + // seems to work just fine, so using that instead. + // + // SAFETY: syscall + misc_info.process_create_time = unsafe { + // Breakpad was using an old method to retrieve this, let's try the + // BSD method instead which is already implemented in libc + let mut proc_info = std::mem::MaybeUninit::<libc::proc_bsdinfo>::uninit(); + let size = std::mem::size_of::<libc::proc_bsdinfo>() as i32; + if libc::proc_pidinfo( + pid, + libc::PROC_PIDTBSDINFO, + 0, + proc_info.as_mut_ptr().cast(), + size, + ) == size + { + let proc_info = proc_info.assume_init(); + + proc_info.pbi_start_tvsec as u32 + } else { + 0 + } + }; + + // Note that Breakpad is using `getrusage` to retrieve this information, + // however that is wrong, as it can only retrieve the process usage information + // for the current or children processes, not an external process, so + // we use the Crashpad method, which is itself based off of the XNU + // method of retrieving the process times + // https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/bsd/kern/kern_resource.c#L1215 + + // The basic task info keeps the timings for all of the terminated threads + let basic_info = dumper.task_info::<MachTaskBasicInfo>().ok(); + + // THe thread times info keeps the timings for all of the living threads + let thread_times_info = dumper.task_info::<TaskThreadsTimeInfo>().ok(); + + let user_time = basic_info + .as_ref() + .map(|bi| Duration::from(bi.user_time)) + .unwrap_or_default() + + thread_times_info + .as_ref() + .map(|tt| Duration::from(tt.user_time)) + .unwrap_or_default(); + let system_time = basic_info + .as_ref() + .map(|bi| Duration::from(bi.system_time)) + .unwrap_or_default() + + thread_times_info + .as_ref() + .map(|tt| Duration::from(tt.system_time)) + .unwrap_or_default(); + + misc_info.process_user_time = user_time.as_secs() as u32; + misc_info.process_kernel_time = system_time.as_secs() as u32; + + // Note that neither of these two keys are present on aarch64, at least atm + let max: u64 = mach::sysctl_by_name(b"hw.cpufrequency_max\0"); + let freq: u64 = mach::sysctl_by_name(b"hw.cpufrequency\0"); + + let max = (max / 1000 * 1000) as u32; + let current = (freq / 1000 * 1000) as u32; + + misc_info.processor_max_mhz = max; + misc_info.processor_mhz_limit = max; + misc_info.processor_current_mhz = current; + + info_section.set_value(buffer, misc_info)?; + + Ok(dirent) + } +} diff --git a/third_party/rust/minidump-writer/src/mac/streams/module_list.rs b/third_party/rust/minidump-writer/src/mac/streams/module_list.rs new file mode 100644 index 0000000000..2b4d13ea74 --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/streams/module_list.rs @@ -0,0 +1,414 @@ +use super::*; + +struct ImageLoadInfo { + /// The preferred load address of the TEXT segment + vm_addr: u64, + /// The size of the TEXT segment + vm_size: u64, + /// The difference between the images preferred and actual load address + slide: isize, +} + +struct ImageDetails { + /// Unique identifier for the module + uuid: [u8; 16], + /// The load info for the image indicating the range of addresses it covers + load_info: ImageLoadInfo, + /// Path to the module on the local filesystem. Note that as of MacOS 11.0.1 + /// for system libraries, this path won't actually exist on the filesystem. + /// This data is more useful as human readable information in a minidump, + /// but is not required, as the real identifier is the UUID + file_path: Option<String>, + /// Version information, not present for the main executable + version: Option<u32>, +} + +impl MinidumpWriter { + /// Writes the [`MDStreamType::ModuleListStream`] to the minidump, which is + /// the last of all loaded modules (images) in the process. + /// + /// Notably, this includes the UUID of the image which is needed to look up + /// debug symbols for the module, as well as the address range covered by + /// the module to know which debug symbols are used to resolve which instruction + /// addresses + pub(crate) fn write_module_list( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result<MDRawDirectory, WriterError> { + // The list of modules is pretty critical information, but there could + // still be useful information in the minidump without them if we can't + // retrieve them for some reason + let modules = self + .write_loaded_modules(buffer, dumper) + .unwrap_or_default(); + + let list_header = MemoryWriter::<u32>::alloc_with_val(buffer, modules.len() as u32)?; + + let mut dirent = MDRawDirectory { + stream_type: MDStreamType::ModuleListStream as u32, + location: list_header.location(), + }; + + if !modules.is_empty() { + let mapping_list = MemoryArrayWriter::<MDRawModule>::alloc_from_iter(buffer, modules)?; + dirent.location.data_size += mapping_list.location().data_size; + } + + Ok(dirent) + } + + fn write_loaded_modules( + &self, + buf: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result<Vec<MDRawModule>, WriterError> { + let (all_images_info, mut images) = dumper.read_images()?; + + // Apparently MacOS will happily list the same image multiple times + // for some reason, so sort the images by load address and remove all + // of the duplicates + images.sort(); + images.dedup(); + + let mut modules = Vec::with_capacity(images.len()); + + for image in images { + if let Ok(image_details) = self.read_image(image, dumper) { + let is_main_executable = image_details.version.is_none(); + + if let Ok(module) = self.write_module(image_details, buf) { + // We want to keep the modules sorted by their load address except + // in the case of the main executable image which we want to put + // first, as it is most likely the culprit, or at least generally + // the most interesting module for human and machine inspectors + if is_main_executable { + modules.insert(0, module); + } else { + modules.push(module) + }; + } + } + } + + if !modules + .get(0) + .map(|rm| rm.version_info.signature != format::VS_FFI_SIGNATURE) + .unwrap_or_default() + { + Err(TaskDumpError::NoExecutableImage.into()) + } else { + // Crashpad also has code for loading the dyld info from the all images + // array above, but AFAICT (and from crashpad's own comments) this will + // never actually happen. It's more robust in the face of changes from + // Apple, which considering their penchant for changings things often + // and not actually documenting anything, is fair, but if that ever + // happens we can just...change the code. + if let Ok(dyld_image) = self.read_dyld(&all_images_info, dumper) { + if let Ok(module) = self.write_module(dyld_image, buf) { + modules.push(module); + } + } + + Ok(modules) + } + } + + /// Obtains important image metadata by traversing the image's load commands + /// + /// # Errors + /// + /// The image's load commands cannot be traversed, or a required load command + /// is missing + fn read_image( + &self, + image: ImageInfo, + dumper: &TaskDumper, + ) -> Result<ImageDetails, TaskDumpError> { + let mut load_info = None; + let mut version = None; + let mut uuid = None; + + { + let load_commands = dumper.read_load_commands(&image)?; + + for lc in load_commands.iter() { + match lc { + mach::LoadCommand::Segment(seg) if load_info.is_none() => { + if &seg.segment_name[..7] == b"__TEXT\0" { + let slide = image.load_address as isize - seg.vm_addr as isize; + + load_info = Some(ImageLoadInfo { + vm_addr: seg.vm_addr, + vm_size: seg.vm_size, + slide, + }); + } + } + mach::LoadCommand::Dylib(dylib) if version.is_none() => { + version = Some(dylib.dylib.current_version); + } + mach::LoadCommand::Uuid(img_id) if uuid.is_none() => { + uuid = Some(img_id.uuid); + } + _ => {} + } + + if load_info.is_some() && version.is_some() && uuid.is_some() { + break; + } + } + } + + let load_info = load_info.ok_or(TaskDumpError::MissingLoadCommand { + name: "LC_SEGMENT_64", + id: mach::LoadCommandKind::Segment, + })?; + let uuid = uuid.ok_or(TaskDumpError::MissingLoadCommand { + name: "LC_UUID", + id: mach::LoadCommandKind::Uuid, + })?; + + let file_path = if image.file_path != 0 { + dumper + .read_string(image.file_path, None) + .unwrap_or_default() + } else { + None + }; + + Ok(ImageDetails { + uuid, + load_info, + file_path, + version, + }) + } + + /// Reads the dynamic linker, which is similar but + fn read_dyld( + &self, + all_images: &task_dumper::AllImagesInfo, + dumper: &TaskDumper, + ) -> Result<ImageDetails, TaskDumpError> { + let image = ImageInfo { + load_address: all_images.dyld_image_load_address, + file_path: 0, + file_mod_date: 0, + }; + + let mut load_info = None; + let mut version = None; + let mut uuid = None; + let mut file_path = None; + + { + let load_commands = dumper.read_load_commands(&image)?; + + for lc in load_commands.iter() { + match lc { + mach::LoadCommand::Segment(seg) if load_info.is_none() => { + if &seg.segment_name[..7] == b"__TEXT\0" { + let slide = image.load_address as isize - seg.vm_addr as isize; + + load_info = Some(ImageLoadInfo { + vm_addr: seg.vm_addr, + vm_size: seg.vm_size, + slide, + }); + } + } + mach::LoadCommand::Dylib(dylib) if version.is_none() => { + version = Some(dylib.dylib.current_version); + } + mach::LoadCommand::Uuid(img_id) if uuid.is_none() => { + uuid = Some(img_id.uuid); + } + mach::LoadCommand::DylinkerCommand(dy_cmd) if file_path.is_none() => { + file_path = Some(dy_cmd.name.to_owned()); + } + _ => {} + } + + if load_info.is_some() && version.is_some() && uuid.is_some() && file_path.is_some() + { + break; + } + } + } + + let load_info = load_info.ok_or(TaskDumpError::MissingLoadCommand { + name: "LC_SEGMENT_64", + id: mach::LoadCommandKind::Segment, + })?; + let uuid = uuid.ok_or(TaskDumpError::MissingLoadCommand { + name: "LC_UUID", + id: mach::LoadCommandKind::Uuid, + })?; + + Ok(ImageDetails { + uuid, + load_info, + file_path, + version, + }) + } + + fn write_module( + &self, + image: ImageDetails, + buf: &mut DumpBuf, + ) -> Result<MDRawModule, WriterError> { + let file_path = image.file_path.as_deref().unwrap_or_default(); + let module_name = write_string_to_location(buf, file_path)?; + + let mut raw_module = MDRawModule { + base_of_image: (image.load_info.vm_addr as isize + image.load_info.slide) as u64, + size_of_image: image.load_info.vm_size as u32, + module_name_rva: module_name.rva, + ..Default::default() + }; + + // Version info is not available for the main executable image since + // it doesn't issue a LC_ID_DYLIB load command + if let Some(version) = image.version { + raw_module.version_info.signature = format::VS_FFI_SIGNATURE; + raw_module.version_info.struct_version = format::VS_FFI_STRUCVERSION; + + // Convert MAC dylib version format, which is a 32 bit number, to the + // format used by minidump. + raw_module.version_info.file_version_hi = version >> 16; + raw_module.version_info.file_version_lo = ((version & 0xff00) << 8) | (version & 0xff); + } + + let module_name = if let Some(sep_index) = file_path.rfind('/') { + &file_path[sep_index + 1..] + } else if file_path.is_empty() { + "<Unknown>" + } else { + file_path + }; + + #[derive(scroll::Pwrite, scroll::SizeWith)] + struct CvInfoPdb { + cv_signature: u32, + signature: format::GUID, + age: u32, + } + + let cv = MemoryWriter::alloc_with_val( + buf, + CvInfoPdb { + cv_signature: format::CvSignature::Pdb70 as u32, + age: 0, + signature: image.uuid.into(), + }, + )?; + + // Note that we don't use write_string_to_location here as the module + // name is a simple 8-bit string, not 16-bit like most other strings + // in the minidump, and is directly part of the record itself, not an rva + buf.write_all(module_name.as_bytes()); + buf.write_all(&[0]); // null terminator + + let mut cv_location = cv.location(); + cv_location.data_size += module_name.len() as u32 + 1; + raw_module.cv_record = cv_location; + + Ok(raw_module) + } +} + +#[cfg(test)] +// The libc functions used here are all marked as deprecated, saying you +// should use the mach2 crate, however, the mach2 crate does not expose +// any of these functions so... +#[allow(deprecated)] +mod test { + use super::*; + + // This function isn't declared in libc nor mach2. And is also undocumented + // by apple, I know, SHOCKING + extern "C" { + fn getsegmentdata( + header: *const libc::mach_header, + segname: *const u8, + size: &mut u64, + ) -> *const u8; + } + + /// Tests that the images we write as modules to the minidump are consistent + /// with those reported by the kernel. The kernel function used as the source + /// of truth can only be used to obtain info for the current process, which + /// is why they aren't used in the actual implementation as we want to handle + /// both the local and intra-process scenarios + #[test] + fn images_match() { + let mdw = MinidumpWriter::new(None, None); + let td = TaskDumper::new(mdw.task); + + let (all_images, images) = td.read_images().unwrap(); + + let actual_image_count = unsafe { libc::_dyld_image_count() } as u32; + + assert_eq!(actual_image_count, images.len() as u32); + + for index in 0..actual_image_count { + let expected_img_hdr = unsafe { libc::_dyld_get_image_header(index) }; + + let actual_img = &images[index as usize]; + + assert_eq!(actual_img.load_address, expected_img_hdr as u64); + + let mut expect_segment_size = 0; + let expect_segment_data = unsafe { + getsegmentdata( + expected_img_hdr, + b"__TEXT\0".as_ptr(), + &mut expect_segment_size, + ) + }; + + let actual_img_details = mdw + .read_image(*actual_img, &td) + .expect("failed to get image details"); + + let expected_image_name = + unsafe { std::ffi::CStr::from_ptr(libc::_dyld_get_image_name(index)) }; + + let expected_slide = unsafe { libc::_dyld_get_image_vmaddr_slide(index) }; + assert_eq!( + expected_slide, actual_img_details.load_info.slide, + "image {index}({expected_image_name:?}) slide is incorrect" + ); + + // The segment pointer has already been adjusted by the slide + assert_eq!( + expect_segment_data as u64, + (actual_img_details.load_info.vm_addr as isize + actual_img_details.load_info.slide) + as u64, + "image {index}({expected_image_name:?}) TEXT address is incorrect" + ); + assert_eq!( + expect_segment_size, actual_img_details.load_info.vm_size, + "image {index}({expected_image_name:?}) TEXT size is incorrect" + ); + + assert_eq!( + expected_image_name.to_str().unwrap(), + actual_img_details.file_path.unwrap() + ); + } + + let dyld = mdw + .read_dyld(&all_images, &td) + .expect("failed to read dyld"); + + // If the user overrides the dynamic linker and runs this test it will + // fail, but that's kind of on you, person reading this comment wondering + // why the test fails. Or Apple changed the path in whatever MacOS version + // in which case, please file a PR! + assert_eq!("/usr/lib/dyld", dyld.file_path.as_deref().unwrap()); + assert!(dyld.load_info.vm_size > 0); + } +} diff --git a/third_party/rust/minidump-writer/src/mac/streams/system_info.rs b/third_party/rust/minidump-writer/src/mac/streams/system_info.rs new file mode 100644 index 0000000000..aac2de573f --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/streams/system_info.rs @@ -0,0 +1,200 @@ +use super::*; +use crate::minidump_format::*; + +/// Retrieve the OS version information. +/// +/// Note that this only works on 10.13.4+, but that release is over 4 years old +/// and 1 version behind the latest unsupported release at the time of this writing +/// +/// Note that Breakpad/Crashpad use a private API in CoreFoundation to do this +/// via _CFCopySystemVersionDictionary->_kCFSystemVersionProductVersionKey +fn os_version() -> (u32, u32, u32) { + let vers = mach::sysctl_string(b"kern.osproductversion\0"); + + let inner = || { + let mut it = vers.split('.'); + + let major: u32 = it.next()?.parse().ok()?; + let minor: u32 = it.next()?.parse().ok()?; + let patch: u32 = it.next().and_then(|p| p.parse().ok()).unwrap_or_default(); + + Some((major, minor, patch)) + }; + + inner().unwrap_or_default() +} + +/// Retrieves the OS build version. +/// +/// Note that Breakpad/Crashpad use a private API in CoreFoundation to do this +/// via _CFCopySystemVersionDictionary->_kCFSystemVersionBuildVersionKey. I have +/// no idea how long this has been the case, but the same information can be +/// retrieved via `sysctlbyname` via the `kern.osversion` key as seen by comparing +/// its value versus the output of the `sw_vers -buildVersion` command +#[inline] +fn build_version() -> String { + mach::sysctl_string(b"kern.osversion\0") +} + +/// Retrieves more detailed information on the cpu. +/// +/// Note that this function is only implemented on `x86_64` as Apple doesn't +/// expose similar info on `aarch64` (or at least, not via the same mechanisms) +fn read_cpu_info(cpu: &mut format::CPU_INFORMATION) { + if !cfg!(target_arch = "x86_64") { + return; + } + + let mut md_feats: u64 = 1 << 2 /*PF_COMPARE_EXCHANGE_DOUBLE*/; + let features: u64 = mach::sysctl_by_name(b"machdep.cpu.feature_bits\0"); + + // Map the cpuid feature to its equivalent minidump cpu feature. + // See https://en.wikipedia.org/wiki/CPUID for where the values for the + // various cpuid bits come from, and + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + // for where the bits for the the minidump come from + macro_rules! map_feature { + ($set:expr, $cpuid_bit:expr, $md_bit:expr) => { + if $set & (1 << $cpuid_bit) != 0 { + md_feats |= 1 << $md_bit; + } + }; + } + + map_feature!( + features, 4, /*TSC*/ + 8 /* PF_RDTSC_INSTRUCTION_AVAILABLE */ + ); + map_feature!(features, 6 /*PAE*/, 9 /* PF_PAE_ENABLED */); + map_feature!( + features, 23, /*MMX*/ + 3 /* PF_MMX_INSTRUCTIONS_AVAILABLE */ + ); + map_feature!( + features, 25, /*SSE*/ + 6 /* PF_XMMI_INSTRUCTIONS_AVAILABLE */ + ); + map_feature!( + features, 26, /*SSE2*/ + 10 /* PF_XMMI64_INSTRUCTIONS_AVAILABLE */ + ); + map_feature!( + features, 32, /*SSE3*/ + 13 /* PF_SSE3_INSTRUCTIONS_AVAILABLE */ + ); + map_feature!( + features, 45, /*CX16*/ + 14 /* PF_COMPARE_EXCHANGE128 */ + ); + map_feature!(features, 58 /*XSAVE*/, 17 /* PF_XSAVE_ENABLED */); + map_feature!( + features, 62, /*RDRAND*/ + 28 /* PF_RDRAND_INSTRUCTION_AVAILABLE */ + ); + + let ext_features: u64 = mach::sysctl_by_name(b"machdep.cpu.extfeature_bits\0"); + + map_feature!( + ext_features, + 27, /* RDTSCP */ + 32 /* PF_RDTSCP_INSTRUCTION_AVAILABLE */ + ); + map_feature!( + ext_features, + 31, /* 3DNOW */ + 7 /* PF_3DNOW_INSTRUCTIONS_AVAILABLE */ + ); + + let leaf_features: u32 = mach::sysctl_by_name(b"machdep.cpu.leaf7_feature_bits\0"); + map_feature!( + leaf_features, + 0, /* F7_FSGSBASE */ + 22 /* PF_RDWRFSGSBASE_AVAILABLE */ + ); + + // In newer production kernels, NX is always enabled. + // See 10.15.0 xnu-6153.11.26/osfmk/x86_64/pmap.c nx_enabled. + md_feats |= 1 << 12 /* PF_NX_ENABLED */; + + // All CPUs that Apple is known to have shipped should support DAZ. + md_feats |= 1 << 11 /* PF_SSE_DAZ_MODE_AVAILABLE */; + + // minidump_common::format::OtherCpuInfo is just 2 adjacent u64's, we only + // set the first, so just do a direct write to the bytes + cpu.data[..std::mem::size_of::<u64>()].copy_from_slice(&md_feats.to_ne_bytes()); +} + +impl MinidumpWriter { + /// Writes the [`MDStreamType::SystemInfoStream`] stream. + /// + /// On MacOS we includes basic CPU information, though some of it is not + /// available on `aarch64` at the time of this writing, as well as kernel + /// version information. + pub(crate) fn write_system_info( + &mut self, + buffer: &mut DumpBuf, + _dumper: &TaskDumper, + ) -> Result<MDRawDirectory, WriterError> { + let mut info_section = MemoryWriter::<MDRawSystemInfo>::alloc(buffer)?; + let dirent = MDRawDirectory { + stream_type: MDStreamType::SystemInfoStream as u32, + location: info_section.location(), + }; + + let number_of_processors: u8 = mach::int_sysctl_by_name(b"hw.ncpu\0"); + // SAFETY: POD buffer + let mut cpu: format::CPU_INFORMATION = unsafe { std::mem::zeroed() }; + read_cpu_info(&mut cpu); + + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + let processor_architecture = MDCPUArchitecture::PROCESSOR_ARCHITECTURE_AMD64; + + // machdep.cpu.family and machdep.cpu.model already take the extended family + // and model IDs into account. See 10.9.2 xnu-2422.90.20/osfmk/i386/cpuid.c + // cpuid_set_generic_info(). + let processor_level: u16 = mach::int_sysctl_by_name(b"machdep.cpu.family\0"); + let model: u8 = mach::int_sysctl_by_name(b"machdep.cpu.model\0"); + let stepping: u8 = mach::int_sysctl_by_name(b"machdep.cpu.stepping\0"); + + let processor_revision = ((model as u16) << 8) | stepping as u16; + } else if #[cfg(target_arch = "aarch64")] { + let processor_architecture = MDCPUArchitecture::PROCESSOR_ARCHITECTURE_ARM64_OLD; + + let family: u32 = mach::sysctl_by_name(b"hw.cpufamily\0"); + + let processor_level = (family & 0xffff0000 >> 16) as u16; + let processor_revision = (family & 0x0000ffff) as u16; + } else { + compile_error!("unsupported target architecture"); + } + } + + let (major_version, minor_version, build_number) = os_version(); + let os_version_loc = write_string_to_location(buffer, &build_version())?; + + let info = MDRawSystemInfo { + // CPU + processor_architecture: processor_architecture as u16, + processor_level, + processor_revision, + number_of_processors, + cpu, + + // OS + platform_id: PlatformId::MacOs as u32, + product_type: 1, // VER_NT_WORKSTATION, could also be VER_NT_SERVER but...seriously? + major_version, + minor_version, + build_number, + csd_version_rva: os_version_loc.rva, + + suite_mask: 0, + reserved2: 0, + }; + + info_section.set_value(buffer, info)?; + + Ok(dirent) + } +} diff --git a/third_party/rust/minidump-writer/src/mac/streams/thread_list.rs b/third_party/rust/minidump-writer/src/mac/streams/thread_list.rs new file mode 100644 index 0000000000..180bb2f665 --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/streams/thread_list.rs @@ -0,0 +1,219 @@ +use super::*; +use crate::minidump_cpu::RawContextCPU; + +impl MinidumpWriter { + /// Writes the [`MDStreamType::ThreadListStream`] which is an array of + /// [`miniduimp_common::format::MINIDUMP_THREAD`] + pub(crate) fn write_thread_list( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result<MDRawDirectory, WriterError> { + let threads = self.threads(dumper); + + let list_header = MemoryWriter::<u32>::alloc_with_val(buffer, threads.len() as u32)?; + + let mut dirent = MDRawDirectory { + stream_type: MDStreamType::ThreadListStream as u32, + location: list_header.location(), + }; + + let mut thread_list = MemoryArrayWriter::<MDRawThread>::alloc_array(buffer, threads.len())?; + dirent.location.data_size += thread_list.location().data_size; + + for (i, tid) in threads.enumerate() { + let thread = self.write_thread(tid, buffer, dumper)?; + thread_list.set_value_at(buffer, thread, i)?; + } + + Ok(dirent) + } + + fn write_thread( + &mut self, + tid: u32, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result<MDRawThread, WriterError> { + let mut thread = MDRawThread { + thread_id: tid, + suspend_count: 0, + priority_class: 0, + priority: 0, + teb: 0, + stack: MDMemoryDescriptor::default(), + thread_context: MDLocationDescriptor::default(), + }; + + let thread_state = dumper.read_thread_state(tid)?; + + self.write_stack_from_start_address(thread_state.sp(), &mut thread, buffer, dumper)?; + + let mut cpu: RawContextCPU = Default::default(); + Self::fill_cpu_context(&thread_state, &mut cpu); + let cpu_section = MemoryWriter::alloc_with_val(buffer, cpu)?; + thread.thread_context = cpu_section.location(); + Ok(thread) + } + + fn write_stack_from_start_address( + &mut self, + start: u64, + thread: &mut MDRawThread, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result<(), WriterError> { + thread.stack.start_of_memory_range = start; + thread.stack.memory.data_size = 0; + thread.stack.memory.rva = buffer.position() as u32; + + let stack_size = self.calculate_stack_size(start, dumper); + + // In some situations the stack address for the thread can come back 0. + // In these cases we skip over the threads in question and stuff the + // stack with a clearly borked value. + // + // In other cases, notably a stack overflow, we might fail to read the + // stack eg. InvalidAddress in which case we use a different borked + // value to indicate the different failure + let stack_location = if stack_size != 0 { + dumper + .read_task_memory(start, stack_size) + .map(|stack_buffer| { + let stack_location = MDLocationDescriptor { + data_size: stack_buffer.len() as u32, + rva: buffer.position() as u32, + }; + buffer.write_all(&stack_buffer); + stack_location + }) + .ok() + } else { + None + }; + + thread.stack.memory = stack_location.unwrap_or_else(|| { + let borked = if stack_size == 0 { + 0xdeadbeef + } else { + 0xdeaddead + }; + + thread.stack.start_of_memory_range = borked; + + let stack_location = MDLocationDescriptor { + data_size: 16, + rva: buffer.position() as u32, + }; + buffer.write_all(&borked.to_ne_bytes()); + buffer.write_all(&borked.to_ne_bytes()); + stack_location + }); + + // Add the stack memory as a raw block of memory, this is written to + // the minidump as part of the memory list stream + self.memory_blocks.push(thread.stack); + Ok(()) + } + + fn calculate_stack_size(&self, start_address: u64, dumper: &TaskDumper) -> usize { + if start_address == 0 { + return 0; + } + + let mut region = if let Ok(region) = dumper.get_vm_region(start_address) { + region + } else { + return 0; + }; + + // Failure or stack corruption, since mach_vm_region had to go + // higher in the process address space to find a valid region. + if start_address < region.range.start { + return 0; + } + + let root_range_start = region.range.start; + let mut stack_size = region.range.end - region.range.start; + + // If the user tag is VM_MEMORY_STACK, look for more readable regions with + // the same tag placed immediately above the computed stack region. Under + // some circumstances, the stack for thread 0 winds up broken up into + // multiple distinct abutting regions. This can happen for several reasons, + // including user code that calls setrlimit(RLIMIT_STACK, ...) or changes + // the access on stack pages by calling mprotect. + if region.info.user_tag == mach2::vm_statistics::VM_MEMORY_STACK { + loop { + let proposed_next_region_base = region.range.end; + + region = if let Ok(reg) = dumper.get_vm_region(region.range.end) { + reg + } else { + break; + }; + + if region.range.start != proposed_next_region_base + || region.info.user_tag != mach2::vm_statistics::VM_MEMORY_STACK + || (region.info.protection & mach2::vm_prot::VM_PROT_READ) == 0 + { + break; + } + + stack_size += region.range.end - region.range.start; + } + } + + (root_range_start + stack_size - start_address) as usize + } + + pub(crate) fn fill_cpu_context( + thread_state: &crate::mac::mach::ThreadState, + out: &mut RawContextCPU, + ) { + let ts = thread_state.arch_state(); + + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + out.context_flags = format::ContextFlagsCpu::CONTEXT_AMD64.bits(); + + out.rax = ts.__rax; + out.rbx = ts.__rbx; + out.rcx = ts.__rcx; + out.rdx = ts.__rdx; + out.rdi = ts.__rdi; + out.rsi = ts.__rsi; + out.rbp = ts.__rbp; + out.rsp = ts.__rsp; + out.r8 = ts.__r8; + out.r9 = ts.__r9; + out.r10 = ts.__r10; + out.r11 = ts.__r11; + out.r12 = ts.__r12; + out.r13 = ts.__r13; + out.r14 = ts.__r14; + out.r15 = ts.__r15; + out.rip = ts.__rip; + // according to AMD's software developer guide, bits above 18 are + // not used in the flags register. Since the minidump format + // specifies 32 bits for the flags register, we can truncate safely + // with no loss. + out.eflags = ts.__rflags as _; + out.cs = ts.__cs as u16; + out.fs = ts.__fs as u16; + out.gs = ts.__gs as u16; + } else if #[cfg(target_arch = "aarch64")] { + // This is kind of a lie as we don't actually include the full float state..? + out.context_flags = format::ContextFlagsArm64Old::CONTEXT_ARM64_OLD_FULL.bits() as u64; + + out.cpsr = ts.cpsr; + out.iregs[..29].copy_from_slice(&ts.x[..29]); + out.iregs[29] = ts.fp; + out.iregs[30] = ts.lr; + out.sp = ts.sp; + out.pc = ts.pc; + } else { + compile_error!("unsupported target arch"); + } + } + } +} diff --git a/third_party/rust/minidump-writer/src/mac/streams/thread_names.rs b/third_party/rust/minidump-writer/src/mac/streams/thread_names.rs new file mode 100644 index 0000000000..42242a6397 --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/streams/thread_names.rs @@ -0,0 +1,79 @@ +use super::*; + +impl MinidumpWriter { + /// Writes the [`MDStreamType::ThreadNamesStream`] which is an array of + /// [`miniduimp_common::format::MINIDUMP_THREAD`] + pub(crate) fn write_thread_names( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result<MDRawDirectory, WriterError> { + let threads = self.threads(dumper); + + let list_header = MemoryWriter::<u32>::alloc_with_val(buffer, threads.len() as u32)?; + + let mut dirent = MDRawDirectory { + stream_type: MDStreamType::ThreadNamesStream as u32, + location: list_header.location(), + }; + + let mut names = MemoryArrayWriter::<MDRawThreadName>::alloc_array(buffer, threads.len())?; + dirent.location.data_size += names.location().data_size; + + for (i, tid) in threads.enumerate() { + // It's unfortunate if we can't grab a thread name, but it's also + // not a critical failure + let name_loc = match Self::write_thread_name(buffer, dumper, tid) { + Ok(loc) => loc, + Err(_err) => { + // TODO: log error + write_string_to_location(buffer, "")? + } + }; + + let thread = MDRawThreadName { + thread_id: tid, + thread_name_rva: name_loc.rva.into(), + }; + + names.set_value_at(buffer, thread, i)?; + } + + Ok(dirent) + } + + /// Attempts to retrieve and write the threadname, returning the threa names + /// location if successful + fn write_thread_name( + buffer: &mut Buffer, + dumper: &TaskDumper, + tid: u32, + ) -> Result<MDLocationDescriptor, WriterError> { + // As noted in usr/include/mach/thread_info.h, the THREAD_EXTENDED_INFO + // return is exactly the same as proc_pidinfo(..., proc_threadinfo) + impl mach::ThreadInfo for libc::proc_threadinfo { + const FLAVOR: u32 = 5; // THREAD_EXTENDED_INFO + } + + let thread_info: libc::proc_threadinfo = dumper.thread_info(tid)?; + + let name = std::str::from_utf8( + // SAFETY: This is an initialized block of static size + unsafe { + std::slice::from_raw_parts( + thread_info.pth_name.as_ptr().cast(), + thread_info.pth_name.len(), + ) + }, + ) + .unwrap_or_default(); + + // Ignore the null terminator + let tname = match name.find('\0') { + Some(i) => &name[..i], + None => name, + }; + + Ok(write_string_to_location(buffer, tname)?) + } +} diff --git a/third_party/rust/minidump-writer/src/mac/task_dumper.rs b/third_party/rust/minidump-writer/src/mac/task_dumper.rs new file mode 100644 index 0000000000..013d432d26 --- /dev/null +++ b/third_party/rust/minidump-writer/src/mac/task_dumper.rs @@ -0,0 +1,462 @@ +use crate::mac::mach; +use mach2::mach_types as mt; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum TaskDumpError { + #[error("kernel error {syscall} {error})")] + Kernel { + syscall: &'static str, + error: mach::KernelError, + }, + #[error("detected an invalid mach image header")] + InvalidMachHeader, + #[error(transparent)] + NonUtf8String(#[from] std::string::FromUtf8Error), + #[error("unable to find the main executable image for the process")] + NoExecutableImage, + #[error("expected load command {name}({id:?}) was not found for an image")] + MissingLoadCommand { + name: &'static str, + id: mach::LoadCommandKind, + }, +} + +/// Wraps a mach call in a Result +macro_rules! mach_call { + ($call:expr) => {{ + // SAFETY: syscall + let kr = unsafe { $call }; + if kr == mach::KERN_SUCCESS { + Ok(()) + } else { + // This is ugly, improvements to the macro welcome! + let mut syscall = stringify!($call); + if let Some(i) = syscall.find('(') { + syscall = &syscall[..i]; + } + Err(TaskDumpError::Kernel { + syscall, + error: kr.into(), + }) + } + }}; +} + +/// `dyld_all_image_infos` from <usr/include/mach-o/dyld_images.h> +/// +/// This struct is truncated as we only need a couple of fields at the beginning +/// of the struct +#[repr(C)] +#[derive(Copy, Clone)] +pub struct AllImagesInfo { + // VERSION 1 + pub version: u32, + /// The number of [`ImageInfo`] structs at that following address + info_array_count: u32, + /// The address in the process where the array of [`ImageInfo`] structs is + info_array_addr: u64, + /// A function pointer, unused + _notification: u64, + /// Unused + _process_detached_from_shared_region: bool, + // VERSION 2 + lib_system_initialized: bool, + // Note that crashpad adds a 32-bit int here to get proper alignment when + // building on 32-bit targets...but we explicitly don't care about 32-bit + // targets since Apple doesn't + pub dyld_image_load_address: u64, +} + +/// `dyld_image_info` from <usr/include/mach-o/dyld_images.h> +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct ImageInfo { + /// The address in the process where the image is loaded + pub load_address: u64, + /// The address in the process where the image's file path can be read + pub file_path: u64, + /// Timestamp for when the image's file was last modified + pub file_mod_date: u64, +} + +impl PartialEq for ImageInfo { + fn eq(&self, o: &Self) -> bool { + self.load_address == o.load_address + } +} + +impl Eq for ImageInfo {} + +impl Ord for ImageInfo { + fn cmp(&self, o: &Self) -> std::cmp::Ordering { + self.load_address.cmp(&o.load_address) + } +} + +impl PartialOrd for ImageInfo { + fn partial_cmp(&self, o: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(o)) + } +} + +/// Describes a region of virtual memory +pub struct VMRegionInfo { + pub info: mach::vm_region_submap_info_64, + pub range: std::ops::Range<u64>, +} + +/// Similarly to PtraceDumper for Linux, this provides access to information +/// for a task (MacOS process) +pub struct TaskDumper { + task: mt::task_t, + page_size: i64, +} + +impl TaskDumper { + /// Constructs a [`TaskDumper`] for the specified task + pub fn new(task: mt::task_t) -> Self { + Self { + task, + // SAFETY: syscall + page_size: unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as i64, + } + } + + /// Reads a block of memory from the task + /// + /// # Errors + /// + /// The syscall to read the task's memory fails for some reason, eg bad address. + pub fn read_task_memory<T>(&self, address: u64, count: usize) -> Result<Vec<T>, TaskDumpError> + where + T: Sized + Clone, + { + let length = (count * std::mem::size_of::<T>()) as u64; + + // use the negative of the page size for the mask to find the page address + let page_address = address & (-self.page_size as u64); + let last_page_address = + (address + length + (self.page_size - 1) as u64) & (-self.page_size as u64); + + let page_size = last_page_address - page_address; + let mut local_start = 0; + let mut local_length = 0; + + mach_call!(mach::mach_vm_read( + self.task, + page_address, + page_size, + &mut local_start, + &mut local_length + ))?; + + let mut buffer = Vec::with_capacity(count); + + // SAFETY: this is safe as long as the kernel has not lied to us + let task_buffer = unsafe { + std::slice::from_raw_parts( + (local_start as *const u8) + .offset((address - page_address) as isize) + .cast(), + count, + ) + }; + buffer.extend_from_slice(task_buffer); + + // Don't worry about the return here, if something goes wrong there's probably + // not much we can do about it, and we have what we want anyways + let _res = mach_call!(mach::mach_vm_deallocate( + mach::mach_task_self(), + local_start as u64, // vm_read returns a pointer, but vm_deallocate takes a integer address :-/ + local_length as u64, // vm_read and vm_deallocate use different sizes :-/ + )); + + Ok(buffer) + } + + /// Reads a null terminated string starting at the specified address. This + /// is a specialization of [`read_task_memory`] since strings can span VM + /// regions. + /// + /// If not specified, the string is capped at 8k which should never be close + /// to being hit in normal scenarios, at least for "system" strings, which is + /// all this interface is used to retrieve + /// + /// # Errors + /// + /// Fails if the address cannot be read for some reason, or the string is + /// not utf-8. + pub fn read_string( + &self, + addr: u64, + expected_size: Option<usize>, + ) -> Result<Option<String>, TaskDumpError> { + // The problem is we don't know how much to read until we know how long + // the string is. And we don't know how long the string is, until we've read + // the memory! So, we'll try to read kMaxStringLength bytes + // (or as many bytes as we can until we reach the end of the vm region). + let get_region_size = || -> Result<u64, TaskDumpError> { + let region = self.get_vm_region(addr)?; + + let mut size_to_end = region.range.end - addr; + + // If the remaining is less than 4k, check if the next region is + // contiguous, and extend the memory that could contain the string + // to include it + if size_to_end < 4 * 1024 { + let maybe_adjacent = self.get_vm_region(region.range.end)?; + + if maybe_adjacent.range.start == region.range.end { + size_to_end += maybe_adjacent.range.end - maybe_adjacent.range.start; + } + } + + Ok(size_to_end) + }; + + if let Ok(size_to_end) = get_region_size() { + let mut bytes = self.read_task_memory( + addr, + std::cmp::min(size_to_end as usize, expected_size.unwrap_or(8 * 1024)), + )?; + + // Find the null terminator and truncate our string + if let Some(null_pos) = bytes.iter().position(|c| *c == 0) { + bytes.resize(null_pos, 0); + } + + Ok(String::from_utf8(bytes).map(Some)?) + } else { + Ok(None) + } + } + + /// Retrives information on the virtual memory region the specified address + /// is located within. + /// + /// # Errors + /// + /// The syscall to retrieve the VM region information fails for some reason, + /// eg. a bad address. + pub fn get_vm_region(&self, addr: u64) -> Result<VMRegionInfo, TaskDumpError> { + let mut region_base = addr; + let mut region_size = 0; + let mut nesting_level = 0; + let mut submap_info = std::mem::MaybeUninit::<mach::vm_region_submap_info_64>::uninit(); + + // <user/include/mach/vm_region.h> + const VM_REGION_SUBMAP_INFO_COUNT_64: u32 = + (std::mem::size_of::<mach::vm_region_submap_info_64>() / std::mem::size_of::<u32>()) + as u32; + + let mut info_count = VM_REGION_SUBMAP_INFO_COUNT_64; + + mach_call!(mach::mach_vm_region_recurse( + self.task, + &mut region_base, + &mut region_size, + &mut nesting_level, + submap_info.as_mut_ptr().cast(), + &mut info_count, + ))?; + + Ok(VMRegionInfo { + // SAFETY: this will be valid if the syscall succeeded + info: unsafe { submap_info.assume_init() }, + range: region_base..region_base + region_size, + }) + } + + /// Retrieves the state of the specified thread. The state is an architecture + /// specific block of CPU context ie register state. + /// + /// # Errors + /// + /// The specified thread id is invalid, or the thread is in a task that is + /// compiled for a different architecture than this local task. + pub fn read_thread_state(&self, tid: u32) -> Result<mach::ThreadState, TaskDumpError> { + let mut thread_state = mach::ThreadState::default(); + + mach_call!(mach::thread_get_state( + tid, + mach::THREAD_STATE_FLAVOR as i32, + thread_state.state.as_mut_ptr(), + &mut thread_state.state_size, + ))?; + + Ok(thread_state) + } + + /// Reads the specified task information. + /// + /// # Errors + /// + /// The syscall to receive the task information failed for some reason, eg. + /// the specified type and the flavor are mismatched and considered invalid. + pub fn task_info<T: mach::TaskInfo>(&self) -> Result<T, TaskDumpError> { + let mut info = std::mem::MaybeUninit::<T>::uninit(); + let mut count = (std::mem::size_of::<T>() / std::mem::size_of::<u32>()) as u32; + + mach_call!(mach::task::task_info( + self.task, + T::FLAVOR, + info.as_mut_ptr().cast(), + &mut count + ))?; + + // SAFETY: this will be initialized if the call succeeded + unsafe { Ok(info.assume_init()) } + } + + /// Reads the specified task information. + /// + /// # Errors + /// + /// The syscall to receive the task information failed for some reason, eg. + /// the specified type and the flavor are mismatched and considered invalid, + /// or the thread no longer exists + pub fn thread_info<T: mach::ThreadInfo>(&self, tid: u32) -> Result<T, TaskDumpError> { + let mut thread_info = std::mem::MaybeUninit::<T>::uninit(); + let mut count = (std::mem::size_of::<T>() / std::mem::size_of::<u32>()) as u32; + + mach_call!(mach::thread_info( + tid, + T::FLAVOR, + thread_info.as_mut_ptr().cast(), + &mut count, + ))?; + + // SAFETY: this will be initialized if the call succeeded + unsafe { Ok(thread_info.assume_init()) } + } + + /// Retrieves all of the images loaded in the task. + /// + /// Note that there may be multiple images with the same load address. + /// + /// # Errors + /// + /// The syscall to retrieve the location of the loaded images fails, or + /// the syscall to read the loaded images from the process memory fails + pub fn read_images(&self) -> Result<(AllImagesInfo, Vec<ImageInfo>), TaskDumpError> { + impl mach::TaskInfo for mach::task_info::task_dyld_info { + const FLAVOR: u32 = mach::task_info::TASK_DYLD_INFO; + } + + // Retrieve the address at which the list of loaded images is located + // within the task + let all_images_addr = { + let dyld_info = self.task_info::<mach::task_info::task_dyld_info>()?; + dyld_info.all_image_info_addr + }; + + // Here we make the assumption that dyld loaded at the same address in + // the crashed process vs. this one. This is an assumption made in + // "dyld_debug.c" and is said to be nearly always valid. + let dyld_all_info_buf = + self.read_task_memory::<u8>(all_images_addr, std::mem::size_of::<AllImagesInfo>())?; + // SAFETY: this is fine as long as the kernel isn't lying to us + let all_images_info: &AllImagesInfo = unsafe { &*(dyld_all_info_buf.as_ptr().cast()) }; + + let images = self.read_task_memory::<ImageInfo>( + all_images_info.info_array_addr, + all_images_info.info_array_count as usize, + )?; + + Ok((*all_images_info, images)) + } + + /// Retrieves the main executable image for the task. + /// + /// Note that this method is currently only used for tests due to deficiencies + /// in `otool` + /// + /// # Errors + /// + /// Any of the errors that apply to [`Self::read_images`] apply here, in + /// addition to not being able to find the main executable image + pub fn read_executable_image(&self) -> Result<ImageInfo, TaskDumpError> { + let (_, images) = self.read_images()?; + + for img in images { + let mach_header = self.read_task_memory::<mach::MachHeader>(img.load_address, 1)?; + + let header = &mach_header[0]; + + if header.magic != mach::MH_MAGIC_64 { + return Err(TaskDumpError::InvalidMachHeader); + } + + if header.file_type == mach::MH_EXECUTE { + return Ok(img); + } + } + + Err(TaskDumpError::NoExecutableImage) + } + + /// Retrieves the load commands for the specified image + /// + /// # Errors + /// + /// We fail to read the image header for the specified image, the header we + /// read is determined to be invalid, or we fail to read the block of memory + /// containing the load commands themselves. + pub fn read_load_commands(&self, img: &ImageInfo) -> Result<mach::LoadCommands, TaskDumpError> { + let mach_header = self.read_task_memory::<mach::MachHeader>(img.load_address, 1)?; + + let header = &mach_header[0]; + + if header.magic != mach::MH_MAGIC_64 { + return Err(TaskDumpError::InvalidMachHeader); + } + + // Read the load commands which immediately follow the image header from + // the task memory. Note that load commands vary in size so we need to + // retrieve the memory as a raw byte buffer that we can then iterate + // through and step according to the size of each load command + let load_commands_buf = self.read_task_memory::<u8>( + img.load_address + std::mem::size_of::<mach::MachHeader>() as u64, + header.size_commands as usize, + )?; + + Ok(mach::LoadCommands { + buffer: load_commands_buf, + count: header.num_commands, + }) + } + + /// Gets a list of all of the thread ids in the task + /// + /// # Errors + /// + /// The syscall to retrieve the list of threads fails + pub fn read_threads(&self) -> Result<&'static [u32], TaskDumpError> { + let mut threads = std::ptr::null_mut(); + let mut thread_count = 0; + + mach_call!(mach::task_threads( + self.task, + &mut threads, + &mut thread_count + ))?; + + Ok( + // SAFETY: This should be valid if the call succeeded + unsafe { std::slice::from_raw_parts(threads, thread_count as usize) }, + ) + } + + /// Retrieves the PID for the task + /// + /// # Errors + /// + /// Presumably the only way this would fail would be if the task we are + /// dumping disappears. + pub fn pid_for_task(&self) -> Result<i32, TaskDumpError> { + let mut pid = 0; + mach_call!(mach::pid_for_task(self.task, &mut pid))?; + Ok(pid) + } +} |