summaryrefslogtreecommitdiffstats
path: root/third_party/rust/minidump-writer/src/mac/task_dumper.rs
blob: 013d432d267ccba8b1a1263cc211bb322b77a5bb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
use crate::mac::mach;
use mach2::mach_types as mt;
use thiserror::Error;

#[derive(Error, Debug)]
pub enum TaskDumpError {
    #[error("kernel error {syscall} {error})")]
    Kernel {
        syscall: &'static str,
        error: mach::KernelError,
    },
    #[error("detected an invalid mach image header")]
    InvalidMachHeader,
    #[error(transparent)]
    NonUtf8String(#[from] std::string::FromUtf8Error),
    #[error("unable to find the main executable image for the process")]
    NoExecutableImage,
    #[error("expected load command {name}({id:?}) was not found for an image")]
    MissingLoadCommand {
        name: &'static str,
        id: mach::LoadCommandKind,
    },
}

/// Wraps a mach call in a Result
macro_rules! mach_call {
    ($call:expr) => {{
        // SAFETY: syscall
        let kr = unsafe { $call };
        if kr == mach::KERN_SUCCESS {
            Ok(())
        } else {
            // This is ugly, improvements to the macro welcome!
            let mut syscall = stringify!($call);
            if let Some(i) = syscall.find('(') {
                syscall = &syscall[..i];
            }
            Err(TaskDumpError::Kernel {
                syscall,
                error: kr.into(),
            })
        }
    }};
}

/// `dyld_all_image_infos` from <usr/include/mach-o/dyld_images.h>
///
/// This struct is truncated as we only need a couple of fields at the beginning
/// of the struct
#[repr(C)]
#[derive(Copy, Clone)]
pub struct AllImagesInfo {
    // VERSION 1
    pub version: u32,
    /// The number of [`ImageInfo`] structs at that following address
    info_array_count: u32,
    /// The address in the process where the array of [`ImageInfo`] structs is
    info_array_addr: u64,
    /// A function pointer, unused
    _notification: u64,
    /// Unused
    _process_detached_from_shared_region: bool,
    // VERSION 2
    lib_system_initialized: bool,
    // Note that crashpad adds a 32-bit int here to get proper alignment when
    // building on 32-bit targets...but we explicitly don't care about 32-bit
    // targets since Apple doesn't
    pub dyld_image_load_address: u64,
}

/// `dyld_image_info` from <usr/include/mach-o/dyld_images.h>
#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct ImageInfo {
    /// The address in the process where the image is loaded
    pub load_address: u64,
    /// The address in the process where the image's file path can be read
    pub file_path: u64,
    /// Timestamp for when the image's file was last modified
    pub file_mod_date: u64,
}

impl PartialEq for ImageInfo {
    fn eq(&self, o: &Self) -> bool {
        self.load_address == o.load_address
    }
}

impl Eq for ImageInfo {}

impl Ord for ImageInfo {
    fn cmp(&self, o: &Self) -> std::cmp::Ordering {
        self.load_address.cmp(&o.load_address)
    }
}

impl PartialOrd for ImageInfo {
    fn partial_cmp(&self, o: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(o))
    }
}

/// Describes a region of virtual memory
pub struct VMRegionInfo {
    pub info: mach::vm_region_submap_info_64,
    pub range: std::ops::Range<u64>,
}

/// Similarly to PtraceDumper for Linux, this provides access to information
/// for a task (MacOS process)
pub struct TaskDumper {
    task: mt::task_t,
    page_size: i64,
}

impl TaskDumper {
    /// Constructs a [`TaskDumper`] for the specified task
    pub fn new(task: mt::task_t) -> Self {
        Self {
            task,
            // SAFETY: syscall
            page_size: unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as i64,
        }
    }

    /// Reads a block of memory from the task
    ///
    /// # Errors
    ///
    /// The syscall to read the task's memory fails for some reason, eg bad address.
    pub fn read_task_memory<T>(&self, address: u64, count: usize) -> Result<Vec<T>, TaskDumpError>
    where
        T: Sized + Clone,
    {
        let length = (count * std::mem::size_of::<T>()) as u64;

        // use the negative of the page size for the mask to find the page address
        let page_address = address & (-self.page_size as u64);
        let last_page_address =
            (address + length + (self.page_size - 1) as u64) & (-self.page_size as u64);

        let page_size = last_page_address - page_address;
        let mut local_start = 0;
        let mut local_length = 0;

        mach_call!(mach::mach_vm_read(
            self.task,
            page_address,
            page_size,
            &mut local_start,
            &mut local_length
        ))?;

        let mut buffer = Vec::with_capacity(count);

        // SAFETY: this is safe as long as the kernel has not lied to us
        let task_buffer = unsafe {
            std::slice::from_raw_parts(
                (local_start as *const u8)
                    .offset((address - page_address) as isize)
                    .cast(),
                count,
            )
        };
        buffer.extend_from_slice(task_buffer);

        // Don't worry about the return here, if something goes wrong there's probably
        // not much we can do about it, and we have what we want anyways
        let _res = mach_call!(mach::mach_vm_deallocate(
            mach::mach_task_self(),
            local_start as u64, // vm_read returns a pointer, but vm_deallocate takes a integer address :-/
            local_length as u64, // vm_read and vm_deallocate use different sizes :-/
        ));

        Ok(buffer)
    }

    /// Reads a null terminated string starting at the specified address. This
    /// is a specialization of [`read_task_memory`] since strings can span VM
    /// regions.
    ///
    /// If not specified, the string is capped at 8k which should never be close
    /// to being hit in normal scenarios, at least for "system" strings, which is
    /// all this interface is used to retrieve
    ///
    /// # Errors
    ///
    /// Fails if the address cannot be read for some reason, or the string is
    /// not utf-8.
    pub fn read_string(
        &self,
        addr: u64,
        expected_size: Option<usize>,
    ) -> Result<Option<String>, TaskDumpError> {
        // The problem is we don't know how much to read until we know how long
        // the string is. And we don't know how long the string is, until we've read
        // the memory!  So, we'll try to read kMaxStringLength bytes
        // (or as many bytes as we can until we reach the end of the vm region).
        let get_region_size = || -> Result<u64, TaskDumpError> {
            let region = self.get_vm_region(addr)?;

            let mut size_to_end = region.range.end - addr;

            // If the remaining is less than 4k, check if the next region is
            // contiguous, and extend the memory that could contain the string
            // to include it
            if size_to_end < 4 * 1024 {
                let maybe_adjacent = self.get_vm_region(region.range.end)?;

                if maybe_adjacent.range.start == region.range.end {
                    size_to_end += maybe_adjacent.range.end - maybe_adjacent.range.start;
                }
            }

            Ok(size_to_end)
        };

        if let Ok(size_to_end) = get_region_size() {
            let mut bytes = self.read_task_memory(
                addr,
                std::cmp::min(size_to_end as usize, expected_size.unwrap_or(8 * 1024)),
            )?;

            // Find the null terminator and truncate our string
            if let Some(null_pos) = bytes.iter().position(|c| *c == 0) {
                bytes.resize(null_pos, 0);
            }

            Ok(String::from_utf8(bytes).map(Some)?)
        } else {
            Ok(None)
        }
    }

    /// Retrives information on the virtual memory region the specified address
    /// is located within.
    ///
    /// # Errors
    ///
    /// The syscall to retrieve the VM region information fails for some reason,
    /// eg. a bad address.
    pub fn get_vm_region(&self, addr: u64) -> Result<VMRegionInfo, TaskDumpError> {
        let mut region_base = addr;
        let mut region_size = 0;
        let mut nesting_level = 0;
        let mut submap_info = std::mem::MaybeUninit::<mach::vm_region_submap_info_64>::uninit();

        // <user/include/mach/vm_region.h>
        const VM_REGION_SUBMAP_INFO_COUNT_64: u32 =
            (std::mem::size_of::<mach::vm_region_submap_info_64>() / std::mem::size_of::<u32>())
                as u32;

        let mut info_count = VM_REGION_SUBMAP_INFO_COUNT_64;

        mach_call!(mach::mach_vm_region_recurse(
            self.task,
            &mut region_base,
            &mut region_size,
            &mut nesting_level,
            submap_info.as_mut_ptr().cast(),
            &mut info_count,
        ))?;

        Ok(VMRegionInfo {
            // SAFETY: this will be valid if the syscall succeeded
            info: unsafe { submap_info.assume_init() },
            range: region_base..region_base + region_size,
        })
    }

    /// Retrieves the state of the specified thread. The state is an architecture
    /// specific block of CPU context ie register state.
    ///
    /// # Errors
    ///
    /// The specified thread id is invalid, or the thread is in a task that is
    /// compiled for a different architecture than this local task.
    pub fn read_thread_state(&self, tid: u32) -> Result<mach::ThreadState, TaskDumpError> {
        let mut thread_state = mach::ThreadState::default();

        mach_call!(mach::thread_get_state(
            tid,
            mach::THREAD_STATE_FLAVOR as i32,
            thread_state.state.as_mut_ptr(),
            &mut thread_state.state_size,
        ))?;

        Ok(thread_state)
    }

    /// Reads the specified task information.
    ///
    /// # Errors
    ///
    /// The syscall to receive the task information failed for some reason, eg.
    /// the specified type and the flavor are mismatched and considered invalid.
    pub fn task_info<T: mach::TaskInfo>(&self) -> Result<T, TaskDumpError> {
        let mut info = std::mem::MaybeUninit::<T>::uninit();
        let mut count = (std::mem::size_of::<T>() / std::mem::size_of::<u32>()) as u32;

        mach_call!(mach::task::task_info(
            self.task,
            T::FLAVOR,
            info.as_mut_ptr().cast(),
            &mut count
        ))?;

        // SAFETY: this will be initialized if the call succeeded
        unsafe { Ok(info.assume_init()) }
    }

    /// Reads the specified task information.
    ///
    /// # Errors
    ///
    /// The syscall to receive the task information failed for some reason, eg.
    /// the specified type and the flavor are mismatched and considered invalid,
    /// or the thread no longer exists
    pub fn thread_info<T: mach::ThreadInfo>(&self, tid: u32) -> Result<T, TaskDumpError> {
        let mut thread_info = std::mem::MaybeUninit::<T>::uninit();
        let mut count = (std::mem::size_of::<T>() / std::mem::size_of::<u32>()) as u32;

        mach_call!(mach::thread_info(
            tid,
            T::FLAVOR,
            thread_info.as_mut_ptr().cast(),
            &mut count,
        ))?;

        // SAFETY: this will be initialized if the call succeeded
        unsafe { Ok(thread_info.assume_init()) }
    }

    /// Retrieves all of the images loaded in the task.
    ///
    /// Note that there may be multiple images with the same load address.
    ///
    /// # Errors
    ///
    /// The syscall to retrieve the location of the loaded images fails, or
    /// the syscall to read the loaded images from the process memory fails
    pub fn read_images(&self) -> Result<(AllImagesInfo, Vec<ImageInfo>), TaskDumpError> {
        impl mach::TaskInfo for mach::task_info::task_dyld_info {
            const FLAVOR: u32 = mach::task_info::TASK_DYLD_INFO;
        }

        // Retrieve the address at which the list of loaded images is located
        // within the task
        let all_images_addr = {
            let dyld_info = self.task_info::<mach::task_info::task_dyld_info>()?;
            dyld_info.all_image_info_addr
        };

        // Here we make the assumption that dyld loaded at the same address in
        // the crashed process vs. this one.  This is an assumption made in
        // "dyld_debug.c" and is said to be nearly always valid.
        let dyld_all_info_buf =
            self.read_task_memory::<u8>(all_images_addr, std::mem::size_of::<AllImagesInfo>())?;
        // SAFETY: this is fine as long as the kernel isn't lying to us
        let all_images_info: &AllImagesInfo = unsafe { &*(dyld_all_info_buf.as_ptr().cast()) };

        let images = self.read_task_memory::<ImageInfo>(
            all_images_info.info_array_addr,
            all_images_info.info_array_count as usize,
        )?;

        Ok((*all_images_info, images))
    }

    /// Retrieves the main executable image for the task.
    ///
    /// Note that this method is currently only used for tests due to deficiencies
    /// in `otool`
    ///
    /// # Errors
    ///
    /// Any of the errors that apply to [`Self::read_images`] apply here, in
    /// addition to not being able to find the main executable image
    pub fn read_executable_image(&self) -> Result<ImageInfo, TaskDumpError> {
        let (_, images) = self.read_images()?;

        for img in images {
            let mach_header = self.read_task_memory::<mach::MachHeader>(img.load_address, 1)?;

            let header = &mach_header[0];

            if header.magic != mach::MH_MAGIC_64 {
                return Err(TaskDumpError::InvalidMachHeader);
            }

            if header.file_type == mach::MH_EXECUTE {
                return Ok(img);
            }
        }

        Err(TaskDumpError::NoExecutableImage)
    }

    /// Retrieves the load commands for the specified image
    ///
    /// # Errors
    ///
    /// We fail to read the image header for the specified image, the header we
    /// read is determined to be invalid, or we fail to read the block of memory
    /// containing the load commands themselves.
    pub fn read_load_commands(&self, img: &ImageInfo) -> Result<mach::LoadCommands, TaskDumpError> {
        let mach_header = self.read_task_memory::<mach::MachHeader>(img.load_address, 1)?;

        let header = &mach_header[0];

        if header.magic != mach::MH_MAGIC_64 {
            return Err(TaskDumpError::InvalidMachHeader);
        }

        // Read the load commands which immediately follow the image header from
        // the task memory. Note that load commands vary in size so we need to
        // retrieve the memory as a raw byte buffer that we can then iterate
        // through and step according to the size of each load command
        let load_commands_buf = self.read_task_memory::<u8>(
            img.load_address + std::mem::size_of::<mach::MachHeader>() as u64,
            header.size_commands as usize,
        )?;

        Ok(mach::LoadCommands {
            buffer: load_commands_buf,
            count: header.num_commands,
        })
    }

    /// Gets a list of all of the thread ids in the task
    ///
    /// # Errors
    ///
    /// The syscall to retrieve the list of threads fails
    pub fn read_threads(&self) -> Result<&'static [u32], TaskDumpError> {
        let mut threads = std::ptr::null_mut();
        let mut thread_count = 0;

        mach_call!(mach::task_threads(
            self.task,
            &mut threads,
            &mut thread_count
        ))?;

        Ok(
            // SAFETY: This should be valid if the call succeeded
            unsafe { std::slice::from_raw_parts(threads, thread_count as usize) },
        )
    }

    /// Retrieves the PID for the task
    ///
    /// # Errors
    ///
    /// Presumably the only way this would fail would be if the task we are
    /// dumping disappears.
    pub fn pid_for_task(&self) -> Result<i32, TaskDumpError> {
        let mut pid = 0;
        mach_call!(mach::pid_for_task(self.task, &mut pid))?;
        Ok(pid)
    }
}