diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 16:49:04 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 16:49:04 +0000 |
commit | 16f504a9dca3fe3b70568f67b7d41241ae485288 (patch) | |
tree | c60f36ada0496ba928b7161059ba5ab1ab224f9d /src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel | |
parent | Initial commit. (diff) | |
download | virtualbox-upstream.tar.xz virtualbox-upstream.zip |
Adding upstream version 7.0.6-dfsg.upstream/7.0.6-dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel')
11 files changed, 4525 insertions, 0 deletions
diff --git a/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/Makefile.kup b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/Makefile.kup diff --git a/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/Makefile b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/Makefile new file mode 100644 index 00000000..5cd153e3 --- /dev/null +++ b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/Makefile @@ -0,0 +1,76 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +UTSBASE = ../.. + +MODULE = dtrace +OBJECTS = $(DTRACE_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(DTRACE_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/dtrace + +include $(UTSBASE)/intel/Makefile.intel + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED +LINTTAGS += -erroff=E_STATIC_UNUSED +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV + +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) +AS_INC_PATH += -I$(DSF_DIR)/$(OBJS_DIR) + +ASSYM_H = $(DSF_DIR)/$(OBJS_DIR)/assym.h + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +$(BINARY): $(ASSYM_H) + +include $(UTSBASE)/intel/Makefile.targ diff --git a/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/Makefile.kup b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/Makefile.kup diff --git a/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/dtrace_asm.s b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/dtrace_asm.s new file mode 100644 index 00000000..47b981d1 --- /dev/null +++ b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/dtrace_asm.s @@ -0,0 +1,457 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/asm_linkage.h> +#include <sys/regset.h> + +#if defined(lint) +#include <sys/dtrace_impl.h> +#else +#include "assym.h" +#endif + +#if defined(lint) || defined(__lint) + +greg_t +dtrace_getfp(void) +{ return (0); } + +#else /* lint */ + +#if defined(__amd64) + + ENTRY_NP(dtrace_getfp) + movq %rbp, %rax + ret + SET_SIZE(dtrace_getfp) + +#elif defined(__i386) + + ENTRY_NP(dtrace_getfp) + movl %ebp, %eax + ret + SET_SIZE(dtrace_getfp) + +#endif /* __i386 */ +#endif /* lint */ + + +#if defined(lint) || defined(__lint) + +uint32_t +dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new) +{ + uint32_t old; + + if ((old = *target) == cmp) + *target = new; + return (old); +} + +void * +dtrace_casptr(void *target, void *cmp, void *new) +{ + void *old; + + if ((old = *(void **)target) == cmp) + *(void **)target = new; + return (old); +} + +#else /* lint */ + +#if defined(__amd64) + + ENTRY(dtrace_cas32) + movl %esi, %eax + lock + cmpxchgl %edx, (%rdi) + ret + SET_SIZE(dtrace_cas32) + + ENTRY(dtrace_casptr) + movq %rsi, %rax + lock + cmpxchgq %rdx, (%rdi) + ret + SET_SIZE(dtrace_casptr) + +#elif defined(__i386) + + ENTRY(dtrace_cas32) + ALTENTRY(dtrace_casptr) + movl 4(%esp), %edx + movl 8(%esp), %eax + movl 12(%esp), %ecx + lock + cmpxchgl %ecx, (%edx) + ret + SET_SIZE(dtrace_casptr) + SET_SIZE(dtrace_cas32) + +#endif /* __i386 */ +#endif /* lint */ + +#if defined(lint) + +/*ARGSUSED*/ +uintptr_t +dtrace_caller(int aframes) +{ + return (0); +} + +#else /* lint */ + +#if defined(__amd64) + ENTRY(dtrace_caller) + movq $-1, %rax + ret + SET_SIZE(dtrace_caller) + +#elif defined(__i386) + + ENTRY(dtrace_caller) + movl $-1, %eax + ret + SET_SIZE(dtrace_caller) + +#endif /* __i386 */ +#endif /* lint */ + +#if defined(lint) + +/*ARGSUSED*/ +void +dtrace_copy(uintptr_t src, uintptr_t dest, size_t size) +{} + +#else + +#if defined(__amd64) + + ENTRY(dtrace_copy) + pushq %rbp + movq %rsp, %rbp + + xchgq %rdi, %rsi /* make %rsi source, %rdi dest */ + movq %rdx, %rcx /* load count */ + repz /* repeat for count ... */ + smovb /* move from %ds:rsi to %ed:rdi */ + leave + ret + SET_SIZE(dtrace_copy) + +#elif defined(__i386) + + ENTRY(dtrace_copy) + pushl %ebp + movl %esp, %ebp + pushl %esi + pushl %edi + + movl 8(%ebp), %esi / Load source address + movl 12(%ebp), %edi / Load destination address + movl 16(%ebp), %ecx / Load count + repz / Repeat for count... + smovb / move from %ds:si to %es:di + + popl %edi + popl %esi + movl %ebp, %esp + popl %ebp + ret + SET_SIZE(dtrace_copy) + +#endif /* __i386 */ +#endif + +#if defined(lint) + +/*ARGSUSED*/ +void +dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size, + volatile uint16_t *flags) +{} + +#else + +#if defined(__amd64) + + ENTRY(dtrace_copystr) + pushq %rbp + movq %rsp, %rbp + +0: + movb (%rdi), %al /* load from source */ + movb %al, (%rsi) /* store to destination */ + addq $1, %rdi /* increment source pointer */ + addq $1, %rsi /* increment destination pointer */ + subq $1, %rdx /* decrement remaining count */ + cmpb $0, %al + je 2f + testq $0xfff, %rdx /* test if count is 4k-aligned */ + jnz 1f /* if not, continue with copying */ + testq $CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */ + jnz 2f +1: + cmpq $0, %rdx + jne 0b +2: + leave + ret + + SET_SIZE(dtrace_copystr) + +#elif defined(__i386) + + ENTRY(dtrace_copystr) + + pushl %ebp / Setup stack frame + movl %esp, %ebp + pushl %ebx / Save registers + + movl 8(%ebp), %ebx / Load source address + movl 12(%ebp), %edx / Load destination address + movl 16(%ebp), %ecx / Load count + +0: + movb (%ebx), %al / Load from source + movb %al, (%edx) / Store to destination + incl %ebx / Increment source pointer + incl %edx / Increment destination pointer + decl %ecx / Decrement remaining count + cmpb $0, %al + je 2f + testl $0xfff, %ecx / Check if count is 4k-aligned + jnz 1f + movl 20(%ebp), %eax / load flags pointer + testl $CPU_DTRACE_BADADDR, (%eax) / load and test dtrace flags + jnz 2f +1: + cmpl $0, %ecx + jne 0b + +2: + popl %ebx + movl %ebp, %esp + popl %ebp + ret + + SET_SIZE(dtrace_copystr) + +#endif /* __i386 */ +#endif + +#if defined(lint) + +/*ARGSUSED*/ +uintptr_t +dtrace_fulword(void *addr) +{ return (0); } + +#else +#if defined(__amd64) + + ENTRY(dtrace_fulword) + movq (%rdi), %rax + ret + SET_SIZE(dtrace_fulword) + +#elif defined(__i386) + + ENTRY(dtrace_fulword) + movl 4(%esp), %ecx + xorl %eax, %eax + movl (%ecx), %eax + ret + SET_SIZE(dtrace_fulword) + +#endif /* __i386 */ +#endif + +#if defined(lint) + +/*ARGSUSED*/ +uint8_t +dtrace_fuword8_nocheck(void *addr) +{ return (0); } + +#else +#if defined(__amd64) + + ENTRY(dtrace_fuword8_nocheck) + xorq %rax, %rax + movb (%rdi), %al + ret + SET_SIZE(dtrace_fuword8_nocheck) + +#elif defined(__i386) + + ENTRY(dtrace_fuword8_nocheck) + movl 4(%esp), %ecx + xorl %eax, %eax + movzbl (%ecx), %eax + ret + SET_SIZE(dtrace_fuword8_nocheck) + +#endif /* __i386 */ +#endif + +#if defined(lint) + +/*ARGSUSED*/ +uint16_t +dtrace_fuword16_nocheck(void *addr) +{ return (0); } + +#else +#if defined(__amd64) + + ENTRY(dtrace_fuword16_nocheck) + xorq %rax, %rax + movw (%rdi), %ax + ret + SET_SIZE(dtrace_fuword16_nocheck) + +#elif defined(__i386) + + ENTRY(dtrace_fuword16_nocheck) + movl 4(%esp), %ecx + xorl %eax, %eax + movzwl (%ecx), %eax + ret + SET_SIZE(dtrace_fuword16_nocheck) + +#endif /* __i386 */ +#endif + +#if defined(lint) + +/*ARGSUSED*/ +uint32_t +dtrace_fuword32_nocheck(void *addr) +{ return (0); } + +#else +#if defined(__amd64) + + ENTRY(dtrace_fuword32_nocheck) + xorq %rax, %rax + movl (%rdi), %eax + ret + SET_SIZE(dtrace_fuword32_nocheck) + +#elif defined(__i386) + + ENTRY(dtrace_fuword32_nocheck) + movl 4(%esp), %ecx + xorl %eax, %eax + movl (%ecx), %eax + ret + SET_SIZE(dtrace_fuword32_nocheck) + +#endif /* __i386 */ +#endif + +#if defined(lint) + +/*ARGSUSED*/ +uint64_t +dtrace_fuword64_nocheck(void *addr) +{ return (0); } + +#else +#if defined(__amd64) + + ENTRY(dtrace_fuword64_nocheck) + movq (%rdi), %rax + ret + SET_SIZE(dtrace_fuword64_nocheck) + +#elif defined(__i386) + + ENTRY(dtrace_fuword64_nocheck) + movl 4(%esp), %ecx + xorl %eax, %eax + xorl %edx, %edx + movl (%ecx), %eax + movl 4(%ecx), %edx + ret + SET_SIZE(dtrace_fuword64_nocheck) + +#endif /* __i386 */ +#endif + +#if defined(lint) || defined(__lint) + +/*ARGSUSED*/ +void +dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which, + int fault, int fltoffs, uintptr_t illval) +{} + +#else /* lint */ +#if defined(__amd64) + + ENTRY(dtrace_probe_error) + pushq %rbp + movq %rsp, %rbp + subq $0x8, %rsp + movq %r9, (%rsp) + movq %r8, %r9 + movq %rcx, %r8 + movq %rdx, %rcx + movq %rsi, %rdx + movq %rdi, %rsi + movl dtrace_probeid_error(%rip), %edi + call dtrace_probe + addq $0x8, %rsp + leave + ret + SET_SIZE(dtrace_probe_error) + +#elif defined(__i386) + + ENTRY(dtrace_probe_error) + pushl %ebp + movl %esp, %ebp + pushl 0x1c(%ebp) + pushl 0x18(%ebp) + pushl 0x14(%ebp) + pushl 0x10(%ebp) + pushl 0xc(%ebp) + pushl 0x8(%ebp) + pushl dtrace_probeid_error + call dtrace_probe + movl %ebp, %esp + popl %ebp + ret + SET_SIZE(dtrace_probe_error) + +#endif /* __i386 */ +#endif diff --git a/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/dtrace_isa.c b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/dtrace_isa.c new file mode 100644 index 00000000..6b5a4e1b --- /dev/null +++ b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/dtrace_isa.c @@ -0,0 +1,746 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/dtrace_impl.h> +#include <sys/stack.h> +#include <sys/frame.h> +#include <sys/cmn_err.h> +#include <sys/privregs.h> +#include <sys/sysmacros.h> + +extern uintptr_t kernelbase; + +int dtrace_ustackdepth_max = 2048; + +void +dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, + uint32_t *intrpc) +{ + struct frame *fp = (struct frame *)dtrace_getfp(); + struct frame *nextfp, *minfp, *stacktop; + int depth = 0; + int on_intr, last = 0; + uintptr_t pc; + uintptr_t caller = CPU->cpu_dtrace_caller; + + if ((on_intr = CPU_ON_INTR(CPU)) != 0) + stacktop = (struct frame *)(CPU->cpu_intr_stack + SA(MINFRAME)); + else + stacktop = (struct frame *)curthread->t_stk; + minfp = fp; + + aframes++; + + if (intrpc != NULL && depth < pcstack_limit) + pcstack[depth++] = (pc_t)intrpc; + + while (depth < pcstack_limit) { + nextfp = (struct frame *)fp->fr_savfp; + pc = fp->fr_savpc; + + if (nextfp <= minfp || nextfp >= stacktop) { + if (on_intr) { + /* + * Hop from interrupt stack to thread stack. + */ + stacktop = (struct frame *)curthread->t_stk; + minfp = (struct frame *)curthread->t_stkbase; + on_intr = 0; + continue; + } + + /* + * This is the last frame we can process; indicate + * that we should return after processing this frame. + */ + last = 1; + } + + if (aframes > 0) { + if (--aframes == 0 && caller != NULL) { + /* + * We've just run out of artificial frames, + * and we have a valid caller -- fill it in + * now. + */ + ASSERT(depth < pcstack_limit); + pcstack[depth++] = (pc_t)caller; + caller = NULL; + } + } else { + if (depth < pcstack_limit) + pcstack[depth++] = (pc_t)pc; + } + + if (last) { + while (depth < pcstack_limit) + pcstack[depth++] = NULL; + return; + } + + fp = nextfp; + minfp = fp; + } +} + +static int +dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, + uintptr_t sp) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = curproc; + uintptr_t oldcontext = lwp->lwp_oldcontext; + uintptr_t oldsp; + volatile uint16_t *flags = + (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; + size_t s1, s2; + int ret = 0; + + ASSERT(pcstack == NULL || pcstack_limit > 0); + ASSERT(dtrace_ustackdepth_max > 0); + + if (p->p_model == DATAMODEL_NATIVE) { + s1 = sizeof (struct frame) + 2 * sizeof (long); + s2 = s1 + sizeof (siginfo_t); + } else { + s1 = sizeof (struct frame32) + 3 * sizeof (int); + s2 = s1 + sizeof (siginfo32_t); + } + + while (pc != 0) { + /* + * We limit the number of times we can go around this + * loop to account for a circular stack. + */ + if (ret++ >= dtrace_ustackdepth_max) { + *flags |= CPU_DTRACE_BADSTACK; + cpu_core[CPU->cpu_id].cpuc_dtrace_illval = sp; + break; + } + + if (pcstack != NULL) { + *pcstack++ = (uint64_t)pc; + pcstack_limit--; + if (pcstack_limit <= 0) + break; + } + + if (sp == 0) + break; + + oldsp = sp; + + if (oldcontext == sp + s1 || oldcontext == sp + s2) { + if (p->p_model == DATAMODEL_NATIVE) { + ucontext_t *ucp = (ucontext_t *)oldcontext; + greg_t *gregs = ucp->uc_mcontext.gregs; + + sp = dtrace_fulword(&gregs[REG_FP]); + pc = dtrace_fulword(&gregs[REG_PC]); + + oldcontext = dtrace_fulword(&ucp->uc_link); + } else { + ucontext32_t *ucp = (ucontext32_t *)oldcontext; + greg32_t *gregs = ucp->uc_mcontext.gregs; + + sp = dtrace_fuword32(&gregs[EBP]); + pc = dtrace_fuword32(&gregs[EIP]); + + oldcontext = dtrace_fuword32(&ucp->uc_link); + } + } else { + if (p->p_model == DATAMODEL_NATIVE) { + struct frame *fr = (struct frame *)sp; + + pc = dtrace_fulword(&fr->fr_savpc); + sp = dtrace_fulword(&fr->fr_savfp); + } else { + struct frame32 *fr = (struct frame32 *)sp; + + pc = dtrace_fuword32(&fr->fr_savpc); + sp = dtrace_fuword32(&fr->fr_savfp); + } + } + + if (sp == oldsp) { + *flags |= CPU_DTRACE_BADSTACK; + cpu_core[CPU->cpu_id].cpuc_dtrace_illval = sp; + break; + } + + /* + * This is totally bogus: if we faulted, we're going to clear + * the fault and break. This is to deal with the apparently + * broken Java stacks on x86. + */ + if (*flags & CPU_DTRACE_FAULT) { + *flags &= ~CPU_DTRACE_FAULT; + break; + } + } + + return (ret); +} + +void +dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = curproc; + struct regs *rp; + uintptr_t pc, sp; + int n; + + ASSERT(DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT)); + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) + return; + + if (pcstack_limit <= 0) + return; + + /* + * If there's no user context we still need to zero the stack. + */ + if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL) + goto zero; + + *pcstack++ = (uint64_t)p->p_pid; + pcstack_limit--; + + if (pcstack_limit <= 0) + return; + + pc = rp->r_pc; + sp = rp->r_fp; + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { + *pcstack++ = (uint64_t)pc; + pcstack_limit--; + if (pcstack_limit <= 0) + return; + + if (p->p_model == DATAMODEL_NATIVE) + pc = dtrace_fulword((void *)rp->r_sp); + else + pc = dtrace_fuword32((void *)rp->r_sp); + } + + n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp); + ASSERT(n >= 0); + ASSERT(n <= pcstack_limit); + + pcstack += n; + pcstack_limit -= n; + +zero: + while (pcstack_limit-- > 0) + *pcstack++ = NULL; +} + +int +dtrace_getustackdepth(void) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = curproc; + struct regs *rp; + uintptr_t pc, sp; + int n = 0; + + if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL) + return (0); + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) + return (-1); + + pc = rp->r_pc; + sp = rp->r_fp; + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { + n++; + + if (p->p_model == DATAMODEL_NATIVE) + pc = dtrace_fulword((void *)rp->r_sp); + else + pc = dtrace_fuword32((void *)rp->r_sp); + } + + n += dtrace_getustack_common(NULL, 0, pc, sp); + + return (n); +} + +void +dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = curproc; + struct regs *rp; + uintptr_t pc, sp, oldcontext; + volatile uint16_t *flags = + (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; + size_t s1, s2; + + if (*flags & CPU_DTRACE_FAULT) + return; + + if (pcstack_limit <= 0) + return; + + /* + * If there's no user context we still need to zero the stack. + */ + if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL) + goto zero; + + *pcstack++ = (uint64_t)p->p_pid; + pcstack_limit--; + + if (pcstack_limit <= 0) + return; + + pc = rp->r_pc; + sp = rp->r_fp; + oldcontext = lwp->lwp_oldcontext; + + if (p->p_model == DATAMODEL_NATIVE) { + s1 = sizeof (struct frame) + 2 * sizeof (long); + s2 = s1 + sizeof (siginfo_t); + } else { + s1 = sizeof (struct frame32) + 3 * sizeof (int); + s2 = s1 + sizeof (siginfo32_t); + } + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { + *pcstack++ = (uint64_t)pc; + *fpstack++ = 0; + pcstack_limit--; + if (pcstack_limit <= 0) + return; + + if (p->p_model == DATAMODEL_NATIVE) + pc = dtrace_fulword((void *)rp->r_sp); + else + pc = dtrace_fuword32((void *)rp->r_sp); + } + + while (pc != 0) { + *pcstack++ = (uint64_t)pc; + *fpstack++ = sp; + pcstack_limit--; + if (pcstack_limit <= 0) + break; + + if (sp == 0) + break; + + if (oldcontext == sp + s1 || oldcontext == sp + s2) { + if (p->p_model == DATAMODEL_NATIVE) { + ucontext_t *ucp = (ucontext_t *)oldcontext; + greg_t *gregs = ucp->uc_mcontext.gregs; + + sp = dtrace_fulword(&gregs[REG_FP]); + pc = dtrace_fulword(&gregs[REG_PC]); + + oldcontext = dtrace_fulword(&ucp->uc_link); + } else { + ucontext_t *ucp = (ucontext_t *)oldcontext; + greg_t *gregs = ucp->uc_mcontext.gregs; + + sp = dtrace_fuword32(&gregs[EBP]); + pc = dtrace_fuword32(&gregs[EIP]); + + oldcontext = dtrace_fuword32(&ucp->uc_link); + } + } else { + if (p->p_model == DATAMODEL_NATIVE) { + struct frame *fr = (struct frame *)sp; + + pc = dtrace_fulword(&fr->fr_savpc); + sp = dtrace_fulword(&fr->fr_savfp); + } else { + struct frame32 *fr = (struct frame32 *)sp; + + pc = dtrace_fuword32(&fr->fr_savpc); + sp = dtrace_fuword32(&fr->fr_savfp); + } + } + + /* + * This is totally bogus: if we faulted, we're going to clear + * the fault and break. This is to deal with the apparently + * broken Java stacks on x86. + */ + if (*flags & CPU_DTRACE_FAULT) { + *flags &= ~CPU_DTRACE_FAULT; + break; + } + } + +zero: + while (pcstack_limit-- > 0) + *pcstack++ = NULL; +} + +/*ARGSUSED*/ +uint64_t +dtrace_getarg(int arg, int aframes) +{ + uintptr_t val; + struct frame *fp = (struct frame *)dtrace_getfp(); + uintptr_t *stack; + int i; +#if defined(__amd64) + /* + * A total of 6 arguments are passed via registers; any argument with + * index of 5 or lower is therefore in a register. + */ + int inreg = 5; +#endif + + for (i = 1; i <= aframes; i++) { + fp = (struct frame *)(fp->fr_savfp); + + if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) { +#if !defined(__amd64) + /* + * If we pass through the invalid op handler, we will + * use the pointer that it passed to the stack as the + * second argument to dtrace_invop() as the pointer to + * the stack. When using this stack, we must step + * beyond the EIP/RIP that was pushed when the trap was + * taken -- hence the "+ 1" below. + */ + stack = ((uintptr_t **)&fp[1])[1] + 1; +#else + /* + * In the case of amd64, we will use the pointer to the + * regs structure that was pushed when we took the + * trap. To get this structure, we must increment + * beyond the frame structure, and then again beyond + * the calling RIP stored in dtrace_invop(). If the + * argument that we're seeking is passed on the stack, + * we'll pull the true stack pointer out of the saved + * registers and decrement our argument by the number + * of arguments passed in registers; if the argument + * we're seeking is passed in regsiters, we can just + * load it directly. + */ + struct regs *rp = (struct regs *)((uintptr_t)&fp[1] + + sizeof (uintptr_t)); + + if (arg <= inreg) { + stack = (uintptr_t *)&rp->r_rdi; + } else { + stack = (uintptr_t *)(rp->r_rsp); + arg -= inreg; + } +#endif + goto load; + } + + } + + /* + * We know that we did not come through a trap to get into + * dtrace_probe() -- the provider simply called dtrace_probe() + * directly. As this is the case, we need to shift the argument + * that we're looking for: the probe ID is the first argument to + * dtrace_probe(), so the argument n will actually be found where + * one would expect to find argument (n + 1). + */ + arg++; + +#if defined(__amd64) + if (arg <= inreg) { + /* + * This shouldn't happen. If the argument is passed in a + * register then it should have been, well, passed in a + * register... + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + + arg -= (inreg + 1); +#endif + stack = (uintptr_t *)&fp[1]; + +load: + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + val = stack[arg]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + + return (val); +} + +/*ARGSUSED*/ +int +dtrace_getstackdepth(int aframes) +{ + struct frame *fp = (struct frame *)dtrace_getfp(); + struct frame *nextfp, *minfp, *stacktop; + int depth = 0; + int on_intr; + + if ((on_intr = CPU_ON_INTR(CPU)) != 0) + stacktop = (struct frame *)(CPU->cpu_intr_stack + SA(MINFRAME)); + else + stacktop = (struct frame *)curthread->t_stk; + minfp = fp; + + aframes++; + + for (;;) { + depth++; + + nextfp = (struct frame *)fp->fr_savfp; + + if (nextfp <= minfp || nextfp >= stacktop) { + if (on_intr) { + /* + * Hop from interrupt stack to thread stack. + */ + stacktop = (struct frame *)curthread->t_stk; + minfp = (struct frame *)curthread->t_stkbase; + on_intr = 0; + continue; + } + break; + } + + fp = nextfp; + minfp = fp; + } + + if (depth <= aframes) + return (0); + + return (depth - aframes); +} + +ulong_t +dtrace_getreg(struct regs *rp, uint_t reg) +{ +#if defined(__amd64) + int regmap[] = { + REG_GS, /* GS */ + REG_FS, /* FS */ + REG_ES, /* ES */ + REG_DS, /* DS */ + REG_RDI, /* EDI */ + REG_RSI, /* ESI */ + REG_RBP, /* EBP */ + REG_RSP, /* ESP */ + REG_RBX, /* EBX */ + REG_RDX, /* EDX */ + REG_RCX, /* ECX */ + REG_RAX, /* EAX */ + REG_TRAPNO, /* TRAPNO */ + REG_ERR, /* ERR */ + REG_RIP, /* EIP */ + REG_CS, /* CS */ + REG_RFL, /* EFL */ + REG_RSP, /* UESP */ + REG_SS /* SS */ + }; + + if (reg <= SS) { + if (reg >= sizeof (regmap) / sizeof (int)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + + reg = regmap[reg]; + } else { + reg -= SS + 1; + } + + switch (reg) { + case REG_RDI: + return (rp->r_rdi); + case REG_RSI: + return (rp->r_rsi); + case REG_RDX: + return (rp->r_rdx); + case REG_RCX: + return (rp->r_rcx); + case REG_R8: + return (rp->r_r8); + case REG_R9: + return (rp->r_r9); + case REG_RAX: + return (rp->r_rax); + case REG_RBX: + return (rp->r_rbx); + case REG_RBP: + return (rp->r_rbp); + case REG_R10: + return (rp->r_r10); + case REG_R11: + return (rp->r_r11); + case REG_R12: + return (rp->r_r12); + case REG_R13: + return (rp->r_r13); + case REG_R14: + return (rp->r_r14); + case REG_R15: + return (rp->r_r15); + case REG_DS: + return (rp->r_ds); + case REG_ES: + return (rp->r_es); + case REG_FS: + return (rp->r_fs); + case REG_GS: + return (rp->r_gs); + case REG_TRAPNO: + return (rp->r_trapno); + case REG_ERR: + return (rp->r_err); + case REG_RIP: + return (rp->r_rip); + case REG_CS: + return (rp->r_cs); + case REG_SS: + return (rp->r_ss); + case REG_RFL: + return (rp->r_rfl); + case REG_RSP: + return (rp->r_rsp); + default: + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + +#else + if (reg > SS) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + + return ((&rp->r_gs)[reg]); +#endif +} + +static int +dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size) +{ + ASSERT(kaddr >= kernelbase && kaddr + size >= kaddr); + + if (uaddr + size >= kernelbase || uaddr + size < uaddr) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[CPU->cpu_id].cpuc_dtrace_illval = uaddr; + return (0); + } + + return (1); +} + +/*ARGSUSED*/ +void +dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size, + volatile uint16_t *flags) +{ + if (dtrace_copycheck(uaddr, kaddr, size)) + dtrace_copy(uaddr, kaddr, size); +} + +/*ARGSUSED*/ +void +dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size, + volatile uint16_t *flags) +{ + if (dtrace_copycheck(uaddr, kaddr, size)) + dtrace_copy(kaddr, uaddr, size); +} + +void +dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size, + volatile uint16_t *flags) +{ + if (dtrace_copycheck(uaddr, kaddr, size)) + dtrace_copystr(uaddr, kaddr, size, flags); +} + +void +dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size, + volatile uint16_t *flags) +{ + if (dtrace_copycheck(uaddr, kaddr, size)) + dtrace_copystr(kaddr, uaddr, size, flags); +} + +uint8_t +dtrace_fuword8(void *uaddr) +{ + extern uint8_t dtrace_fuword8_nocheck(void *); + if ((uintptr_t)uaddr >= _userlimit) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[CPU->cpu_id].cpuc_dtrace_illval = (uintptr_t)uaddr; + return (0); + } + return (dtrace_fuword8_nocheck(uaddr)); +} + +uint16_t +dtrace_fuword16(void *uaddr) +{ + extern uint16_t dtrace_fuword16_nocheck(void *); + if ((uintptr_t)uaddr >= _userlimit) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[CPU->cpu_id].cpuc_dtrace_illval = (uintptr_t)uaddr; + return (0); + } + return (dtrace_fuword16_nocheck(uaddr)); +} + +uint32_t +dtrace_fuword32(void *uaddr) +{ + extern uint32_t dtrace_fuword32_nocheck(void *); + if ((uintptr_t)uaddr >= _userlimit) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[CPU->cpu_id].cpuc_dtrace_illval = (uintptr_t)uaddr; + return (0); + } + return (dtrace_fuword32_nocheck(uaddr)); +} + +uint64_t +dtrace_fuword64(void *uaddr) +{ + extern uint64_t dtrace_fuword64_nocheck(void *); + if ((uintptr_t)uaddr >= _userlimit) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[CPU->cpu_id].cpuc_dtrace_illval = (uintptr_t)uaddr; + return (0); + } + return (dtrace_fuword64_nocheck(uaddr)); +} diff --git a/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/fasttrap.conf b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/fasttrap.conf new file mode 100644 index 00000000..a25f883f --- /dev/null +++ b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/fasttrap.conf @@ -0,0 +1,39 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +name="fasttrap" parent="pseudo" instance=0; + +# fasttrap-max-probes is the limit on the number of tracepoints created +# by DTrace's pid provider. This value should be increased if DTrace is +# unable to create the desired probes because the limit has been hit. +#fasttrap-max-probes=250000; + +# fasttrap-hash-size determines the size of the hash table used to store +# enabled DTrace pid provider tracepoints. If there are many enabled +# tracepoints or many tracepoints hash to the same value, increasing this +# variable can improve the performance of executing a traced instruction. +#fasttrap-hash-size=16384; diff --git a/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/fasttrap_isa.c b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/fasttrap_isa.c new file mode 100644 index 00000000..c4b58453 --- /dev/null +++ b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/fasttrap_isa.c @@ -0,0 +1,1745 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/fasttrap_isa.h> +#include <sys/fasttrap_impl.h> +#include <sys/dtrace.h> +#include <sys/dtrace_impl.h> +#include <sys/cmn_err.h> +#include <sys/regset.h> +#include <sys/privregs.h> +#include <sys/segments.h> +#include <sys/x86_archext.h> +#include <sys/sysmacros.h> +#include <sys/trap.h> +#include <sys/archsystm.h> + +/* + * Lossless User-Land Tracing on x86 + * --------------------------------- + * + * The execution of most instructions is not dependent on the address; for + * these instructions it is sufficient to copy them into the user process's + * address space and execute them. To effectively single-step an instruction + * in user-land, we copy out the following sequence of instructions to scratch + * space in the user thread's ulwp_t structure. + * + * We then set the program counter (%eip or %rip) to point to this scratch + * space. Once execution resumes, the original instruction is executed and + * then control flow is redirected to what was originally the subsequent + * instruction. If the kernel attemps to deliver a signal while single- + * stepping, the signal is deferred and the program counter is moved into the + * second sequence of instructions. The second sequence ends in a trap into + * the kernel where the deferred signal is then properly handled and delivered. + * + * For instructions whose execute is position dependent, we perform simple + * emulation. These instructions are limited to control transfer + * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle + * of %rip-relative addressing that means that almost any instruction can be + * position dependent. For all the details on how we emulate generic + * instructions included %rip-relative instructions, see the code in + * fasttrap_pid_probe() below where we handle instructions of type + * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing). + */ + +#define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3) +#define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7) +#define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7) +#define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm)) + +#define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3) +#define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7) +#define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7) + +#define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1) +#define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1) +#define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1) +#define FASTTRAP_REX_B(rex) ((rex) & 1) +#define FASTTRAP_REX(w, r, x, b) \ + (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b)) + +/* + * Single-byte op-codes. + */ +#define FASTTRAP_PUSHL_EBP 0x55 + +#define FASTTRAP_JO 0x70 +#define FASTTRAP_JNO 0x71 +#define FASTTRAP_JB 0x72 +#define FASTTRAP_JAE 0x73 +#define FASTTRAP_JE 0x74 +#define FASTTRAP_JNE 0x75 +#define FASTTRAP_JBE 0x76 +#define FASTTRAP_JA 0x77 +#define FASTTRAP_JS 0x78 +#define FASTTRAP_JNS 0x79 +#define FASTTRAP_JP 0x7a +#define FASTTRAP_JNP 0x7b +#define FASTTRAP_JL 0x7c +#define FASTTRAP_JGE 0x7d +#define FASTTRAP_JLE 0x7e +#define FASTTRAP_JG 0x7f + +#define FASTTRAP_NOP 0x90 + +#define FASTTRAP_MOV_EAX 0xb8 +#define FASTTRAP_MOV_ECX 0xb9 + +#define FASTTRAP_RET16 0xc2 +#define FASTTRAP_RET 0xc3 + +#define FASTTRAP_LOOPNZ 0xe0 +#define FASTTRAP_LOOPZ 0xe1 +#define FASTTRAP_LOOP 0xe2 +#define FASTTRAP_JCXZ 0xe3 + +#define FASTTRAP_CALL 0xe8 +#define FASTTRAP_JMP32 0xe9 +#define FASTTRAP_JMP8 0xeb + +#define FASTTRAP_INT3 0xcc +#define FASTTRAP_INT 0xcd + +#define FASTTRAP_2_BYTE_OP 0x0f +#define FASTTRAP_GROUP5_OP 0xff + +/* + * Two-byte op-codes (second byte only). + */ +#define FASTTRAP_0F_JO 0x80 +#define FASTTRAP_0F_JNO 0x81 +#define FASTTRAP_0F_JB 0x82 +#define FASTTRAP_0F_JAE 0x83 +#define FASTTRAP_0F_JE 0x84 +#define FASTTRAP_0F_JNE 0x85 +#define FASTTRAP_0F_JBE 0x86 +#define FASTTRAP_0F_JA 0x87 +#define FASTTRAP_0F_JS 0x88 +#define FASTTRAP_0F_JNS 0x89 +#define FASTTRAP_0F_JP 0x8a +#define FASTTRAP_0F_JNP 0x8b +#define FASTTRAP_0F_JL 0x8c +#define FASTTRAP_0F_JGE 0x8d +#define FASTTRAP_0F_JLE 0x8e +#define FASTTRAP_0F_JG 0x8f + +#define FASTTRAP_EFLAGS_OF 0x800 +#define FASTTRAP_EFLAGS_DF 0x400 +#define FASTTRAP_EFLAGS_SF 0x080 +#define FASTTRAP_EFLAGS_ZF 0x040 +#define FASTTRAP_EFLAGS_AF 0x010 +#define FASTTRAP_EFLAGS_PF 0x004 +#define FASTTRAP_EFLAGS_CF 0x001 + +/* + * Instruction prefixes. + */ +#define FASTTRAP_PREFIX_OPERAND 0x66 +#define FASTTRAP_PREFIX_ADDRESS 0x67 +#define FASTTRAP_PREFIX_CS 0x2E +#define FASTTRAP_PREFIX_DS 0x3E +#define FASTTRAP_PREFIX_ES 0x26 +#define FASTTRAP_PREFIX_FS 0x64 +#define FASTTRAP_PREFIX_GS 0x65 +#define FASTTRAP_PREFIX_SS 0x36 +#define FASTTRAP_PREFIX_LOCK 0xF0 +#define FASTTRAP_PREFIX_REP 0xF3 +#define FASTTRAP_PREFIX_REPNE 0xF2 + +#define FASTTRAP_NOREG 0xff + +/* + * Map between instruction register encodings and the kernel constants which + * correspond to indicies into struct regs. + */ +#ifdef __amd64 +static const uint8_t regmap[16] = { + REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, + REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, +}; +#else +static const uint8_t regmap[8] = { + EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI +}; +#endif + +static ulong_t fasttrap_getreg(struct regs *, uint_t); + +static uint64_t +fasttrap_anarg(struct regs *rp, int function_entry, int argno) +{ + uint64_t value; + int shift = function_entry ? 1 : 0; + +#ifdef __amd64 + if (curproc->p_model == DATAMODEL_LP64) { + uintptr_t *stack; + + /* + * In 64-bit mode, the first six arguments are stored in + * registers. + */ + if (argno < 6) + return ((&rp->r_rdi)[argno]); + + stack = (uintptr_t *)rp->r_sp; + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + value = dtrace_fulword(&stack[argno - 6 + shift]); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); + } else { +#endif + uint32_t *stack = (uint32_t *)rp->r_sp; + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + value = dtrace_fuword32(&stack[argno + shift]); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); +#ifdef __amd64 + } +#endif + + return (value); +} + +/*ARGSUSED*/ +int +fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, + fasttrap_probe_type_t type) +{ + uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10]; + size_t len = FASTTRAP_MAX_INSTR_SIZE; + size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET)); + uint_t start = 0; + int rmindex, size; + uint8_t seg, rex = 0; + + /* + * Read the instruction at the given address out of the process's + * address space. We don't have to worry about a debugger + * changing this instruction before we overwrite it with our trap + * instruction since P_PR_LOCK is set. Since instructions can span + * pages, we potentially read the instruction in two parts. If the + * second part fails, we just zero out that part of the instruction. + */ + if (uread(p, &instr[0], first, pc) != 0) + return (-1); + if (len > first && + uread(p, &instr[first], len - first, pc + first) != 0) { + bzero(&instr[first], len - first); + len = first; + } + + /* + * If the disassembly fails, then we have a malformed instruction. + */ + if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0) + return (-1); + + /* + * Make sure the disassembler isn't completely broken. + */ + ASSERT(-1 <= rmindex && rmindex < size); + + /* + * If the computed size is greater than the number of bytes read, + * then it was a malformed instruction possibly because it fell on a + * page boundary and the subsequent page was missing or because of + * some malicious user. + */ + if (size > len) + return (-1); + + tp->ftt_size = (uint8_t)size; + tp->ftt_segment = FASTTRAP_SEG_NONE; + + /* + * Find the start of the instruction's opcode by processing any + * legacy prefixes. + */ + for (;;) { + seg = 0; + switch (instr[start]) { + case FASTTRAP_PREFIX_SS: + seg++; + RT_FALL_THRU(); + case FASTTRAP_PREFIX_GS: + seg++; + RT_FALL_THRU(); + case FASTTRAP_PREFIX_FS: + seg++; + RT_FALL_THRU(); + case FASTTRAP_PREFIX_ES: + seg++; + RT_FALL_THRU(); + case FASTTRAP_PREFIX_DS: + seg++; + RT_FALL_THRU(); + case FASTTRAP_PREFIX_CS: + seg++; + RT_FALL_THRU(); + case FASTTRAP_PREFIX_OPERAND: + case FASTTRAP_PREFIX_ADDRESS: + case FASTTRAP_PREFIX_LOCK: + case FASTTRAP_PREFIX_REP: + case FASTTRAP_PREFIX_REPNE: + if (seg != 0) { + /* + * It's illegal for an instruction to specify + * two segment prefixes -- give up on this + * illegal instruction. + */ + if (tp->ftt_segment != FASTTRAP_SEG_NONE) + return (-1); + + tp->ftt_segment = seg; + } + start++; + continue; + } + break; + } + +#ifdef __amd64 + /* + * Identify the REX prefix on 64-bit processes. + */ + if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40) + rex = instr[start++]; +#endif + + /* + * Now that we're pretty sure that the instruction is okay, copy the + * valid part to the tracepoint. + */ + bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE); + + tp->ftt_type = FASTTRAP_T_COMMON; + if (instr[start] == FASTTRAP_2_BYTE_OP) { + switch (instr[start + 1]) { + case FASTTRAP_0F_JO: + case FASTTRAP_0F_JNO: + case FASTTRAP_0F_JB: + case FASTTRAP_0F_JAE: + case FASTTRAP_0F_JE: + case FASTTRAP_0F_JNE: + case FASTTRAP_0F_JBE: + case FASTTRAP_0F_JA: + case FASTTRAP_0F_JS: + case FASTTRAP_0F_JNS: + case FASTTRAP_0F_JP: + case FASTTRAP_0F_JNP: + case FASTTRAP_0F_JL: + case FASTTRAP_0F_JGE: + case FASTTRAP_0F_JLE: + case FASTTRAP_0F_JG: + tp->ftt_type = FASTTRAP_T_JCC; + tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO; + tp->ftt_dest = pc + tp->ftt_size + + /* LINTED - alignment */ + *(int32_t *)&instr[start + 2]; + break; + } + } else if (instr[start] == FASTTRAP_GROUP5_OP) { + uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]); + uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]); + uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]); + + if (reg == 2 || reg == 4) { + uint_t i, sz; + + if (reg == 2) + tp->ftt_type = FASTTRAP_T_CALL; + else + tp->ftt_type = FASTTRAP_T_JMP; + + if (mod == 3) + tp->ftt_code = 2; + else + tp->ftt_code = 1; + + ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); + + /* + * See AMD x86-64 Architecture Programmer's Manual + * Volume 3, Section 1.2.7, Table 1-12, and + * Appendix A.3.1, Table A-15. + */ + if (mod != 3 && rm == 4) { + uint8_t sib = instr[start + 2]; + uint_t index = FASTTRAP_SIB_INDEX(sib); + uint_t base = FASTTRAP_SIB_BASE(sib); + + tp->ftt_scale = FASTTRAP_SIB_SCALE(sib); + + tp->ftt_index = (index == 4) ? + FASTTRAP_NOREG : + regmap[index | (FASTTRAP_REX_X(rex) << 3)]; + tp->ftt_base = (mod == 0 && base == 5) ? + FASTTRAP_NOREG : + regmap[base | (FASTTRAP_REX_B(rex) << 3)]; + + i = 3; + sz = mod == 1 ? 1 : 4; + } else { + /* + * In 64-bit mode, mod == 0 and r/m == 5 + * denotes %rip-relative addressing; in 32-bit + * mode, the base register isn't used. In both + * modes, there is a 32-bit operand. + */ + if (mod == 0 && rm == 5) { +#ifdef __amd64 + if (p->p_model == DATAMODEL_LP64) + tp->ftt_base = REG_RIP; + else +#endif + tp->ftt_base = FASTTRAP_NOREG; + sz = 4; + } else { + uint8_t base = rm | + (FASTTRAP_REX_B(rex) << 3); + + tp->ftt_base = regmap[base]; + sz = mod == 1 ? 1 : mod == 2 ? 4 : 0; + } + tp->ftt_index = FASTTRAP_NOREG; + i = 2; + } + + if (sz == 1) { + tp->ftt_dest = *(int8_t *)&instr[start + i]; + } else if (sz == 4) { + /* LINTED - alignment */ + tp->ftt_dest = *(int32_t *)&instr[start + i]; + } else { + tp->ftt_dest = 0; + } + } + } else { + switch (instr[start]) { + case FASTTRAP_RET: + tp->ftt_type = FASTTRAP_T_RET; + break; + + case FASTTRAP_RET16: + tp->ftt_type = FASTTRAP_T_RET16; + /* LINTED - alignment */ + tp->ftt_dest = *(uint16_t *)&instr[start + 1]; + break; + + case FASTTRAP_JO: + case FASTTRAP_JNO: + case FASTTRAP_JB: + case FASTTRAP_JAE: + case FASTTRAP_JE: + case FASTTRAP_JNE: + case FASTTRAP_JBE: + case FASTTRAP_JA: + case FASTTRAP_JS: + case FASTTRAP_JNS: + case FASTTRAP_JP: + case FASTTRAP_JNP: + case FASTTRAP_JL: + case FASTTRAP_JGE: + case FASTTRAP_JLE: + case FASTTRAP_JG: + tp->ftt_type = FASTTRAP_T_JCC; + tp->ftt_code = instr[start]; + tp->ftt_dest = pc + tp->ftt_size + + (int8_t)instr[start + 1]; + break; + + case FASTTRAP_LOOPNZ: + case FASTTRAP_LOOPZ: + case FASTTRAP_LOOP: + tp->ftt_type = FASTTRAP_T_LOOP; + tp->ftt_code = instr[start]; + tp->ftt_dest = pc + tp->ftt_size + + (int8_t)instr[start + 1]; + break; + + case FASTTRAP_JCXZ: + tp->ftt_type = FASTTRAP_T_JCXZ; + tp->ftt_dest = pc + tp->ftt_size + + (int8_t)instr[start + 1]; + break; + + case FASTTRAP_CALL: + tp->ftt_type = FASTTRAP_T_CALL; + tp->ftt_dest = pc + tp->ftt_size + + /* LINTED - alignment */ + *(int32_t *)&instr[start + 1]; + tp->ftt_code = 0; + break; + + case FASTTRAP_JMP32: + tp->ftt_type = FASTTRAP_T_JMP; + tp->ftt_dest = pc + tp->ftt_size + + /* LINTED - alignment */ + *(int32_t *)&instr[start + 1]; + break; + case FASTTRAP_JMP8: + tp->ftt_type = FASTTRAP_T_JMP; + tp->ftt_dest = pc + tp->ftt_size + + (int8_t)instr[start + 1]; + break; + + case FASTTRAP_PUSHL_EBP: + if (start == 0) + tp->ftt_type = FASTTRAP_T_PUSHL_EBP; + break; + + case FASTTRAP_NOP: +#ifdef __amd64 + ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); + + /* + * On amd64 we have to be careful not to confuse a nop + * (actually xchgl %eax, %eax) with an instruction using + * the same opcode, but that does something different + * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax). + */ + if (FASTTRAP_REX_B(rex) == 0) +#endif + tp->ftt_type = FASTTRAP_T_NOP; + break; + + case FASTTRAP_INT3: + /* + * The pid provider shares the int3 trap with debugger + * breakpoints so we can't instrument them. + */ + ASSERT(instr[start] == FASTTRAP_INSTR); + return (-1); + + case FASTTRAP_INT: + /* + * Interrupts seem like they could be traced with + * no negative implications, but it's possible that + * a thread could be redirected by the trap handling + * code which would eventually return to the + * instruction after the interrupt. If the interrupt + * were in our scratch space, the subsequent + * instruction might be overwritten before we return. + * Accordingly we refuse to instrument any interrupt. + */ + return (-1); + } + } + +#ifdef __amd64 + if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) { + /* + * If the process is 64-bit and the instruction type is still + * FASTTRAP_T_COMMON -- meaning we're going to copy it out an + * execute it -- we need to watch for %rip-relative + * addressing mode. See the portion of fasttrap_pid_probe() + * below where we handle tracepoints with type + * FASTTRAP_T_COMMON for how we emulate instructions that + * employ %rip-relative addressing. + */ + if (rmindex != -1) { + uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]); + uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]); + uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]); + + ASSERT(rmindex > start); + + if (mod == 0 && rm == 5) { + /* + * We need to be sure to avoid other + * registers used by this instruction. While + * the reg field may determine the op code + * rather than denoting a register, assuming + * that it denotes a register is always safe. + * We leave the REX field intact and use + * whatever value's there for simplicity. + */ + if (reg != 0) { + tp->ftt_ripmode = FASTTRAP_RIP_1 | + (FASTTRAP_RIP_X * + FASTTRAP_REX_B(rex)); + rm = 0; + } else { + tp->ftt_ripmode = FASTTRAP_RIP_2 | + (FASTTRAP_RIP_X * + FASTTRAP_REX_B(rex)); + rm = 1; + } + + tp->ftt_modrm = tp->ftt_instr[rmindex]; + tp->ftt_instr[rmindex] = + FASTTRAP_MODRM(2, reg, rm); + } + } + } +#endif + + return (0); +} + +int +fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) +{ + fasttrap_instr_t instr = FASTTRAP_INSTR; + + if (uwrite(p, &instr, 1, tp->ftt_pc) != 0) + return (-1); + + return (0); +} + +int +fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) +{ + uint8_t instr; + + /* + * Distinguish between read or write failures and a changed + * instruction. + */ + if (uread(p, &instr, 1, tp->ftt_pc) != 0) + return (0); + if (instr != FASTTRAP_INSTR) + return (0); + if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0) + return (-1); + + return (0); +} + +#ifdef __amd64 +static uintptr_t +fasttrap_fulword_noerr(const void *uaddr) +{ + uintptr_t ret; + + if (fasttrap_fulword(uaddr, &ret) == 0) + return (ret); + + return (0); +} +#endif + +static uint32_t +fasttrap_fuword32_noerr(const void *uaddr) +{ + uint32_t ret; + + if (fasttrap_fuword32(uaddr, &ret) == 0) + return (ret); + + return (0); +} + +static void +fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid, + uintptr_t new_pc) +{ + fasttrap_tracepoint_t *tp; + fasttrap_bucket_t *bucket; + fasttrap_id_t *id; + kmutex_t *pid_mtx; + + pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; + mutex_enter(pid_mtx); + bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + + for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { + if (pid == tp->ftt_pid && pc == tp->ftt_pc && + tp->ftt_proc->ftpc_acount != 0) + break; + } + + /* + * Don't sweat it if we can't find the tracepoint again; unlike + * when we're in fasttrap_pid_probe(), finding the tracepoint here + * is not essential to the correct execution of the process. + */ + if (tp == NULL) { + mutex_exit(pid_mtx); + return; + } + + for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { + /* + * If there's a branch that could act as a return site, we + * need to trace it, and check here if the program counter is + * external to the function. + */ + if (tp->ftt_type != FASTTRAP_T_RET && + tp->ftt_type != FASTTRAP_T_RET16 && + new_pc - id->fti_probe->ftp_faddr < + id->fti_probe->ftp_fsize) + continue; + + dtrace_probe(id->fti_probe->ftp_id, + pc - id->fti_probe->ftp_faddr, + rp->r_r0, rp->r_r1, 0, 0); + } + + mutex_exit(pid_mtx); +} + +static void +fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr) +{ + sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); + + sqp->sq_info.si_signo = SIGSEGV; + sqp->sq_info.si_code = SEGV_MAPERR; + sqp->sq_info.si_addr = (caddr_t)addr; + + mutex_enter(&p->p_lock); + sigaddqa(p, t, sqp); + mutex_exit(&p->p_lock); + + if (t != NULL) + aston(t); +} + +#ifdef __amd64 +static void +fasttrap_usdt_args64(fasttrap_probe_t *probe, struct regs *rp, int argc, + uintptr_t *argv) +{ + int i, x, cap = MIN(argc, probe->ftp_nargs); + uintptr_t *stack = (uintptr_t *)rp->r_sp; + + for (i = 0; i < cap; i++) { + x = probe->ftp_argmap[i]; + + if (x < 6) + argv[i] = (&rp->r_rdi)[x]; + else + argv[i] = fasttrap_fulword_noerr(&stack[x]); + } + + for (; i < argc; i++) { + argv[i] = 0; + } +} +#endif + +static void +fasttrap_usdt_args32(fasttrap_probe_t *probe, struct regs *rp, int argc, + uint32_t *argv) +{ + int i, x, cap = MIN(argc, probe->ftp_nargs); + uint32_t *stack = (uint32_t *)rp->r_sp; + + for (i = 0; i < cap; i++) { + x = probe->ftp_argmap[i]; + + argv[i] = fasttrap_fuword32_noerr(&stack[x]); + } + + for (; i < argc; i++) { + argv[i] = 0; + } +} + +static int +fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct regs *rp, uintptr_t *addr) +{ + proc_t *p = curproc; + user_desc_t *desc; + uint16_t sel, ndx, type; + uintptr_t limit; + + switch (tp->ftt_segment) { + case FASTTRAP_SEG_CS: + sel = rp->r_cs; + break; + case FASTTRAP_SEG_DS: + sel = rp->r_ds; + break; + case FASTTRAP_SEG_ES: + sel = rp->r_es; + break; + case FASTTRAP_SEG_FS: + sel = rp->r_fs; + break; + case FASTTRAP_SEG_GS: + sel = rp->r_gs; + break; + case FASTTRAP_SEG_SS: + sel = rp->r_ss; + break; + } + + /* + * Make sure the given segment register specifies a user priority + * selector rather than a kernel selector. + */ + if (!SELISUPL(sel)) + return (-1); + + ndx = SELTOIDX(sel); + + /* + * Check the bounds and grab the descriptor out of the specified + * descriptor table. + */ + if (SELISLDT(sel)) { + if (ndx > p->p_ldtlimit) + return (-1); + + desc = p->p_ldt + ndx; + + } else { + if (ndx >= NGDT) + return (-1); + + desc = cpu_get_gdt() + ndx; + } + + /* + * The descriptor must have user privilege level and it must be + * present in memory. + */ + if (desc->usd_dpl != SEL_UPL || desc->usd_p != 1) + return (-1); + + type = desc->usd_type; + + /* + * If the S bit in the type field is not set, this descriptor can + * only be used in system context. + */ + if ((type & 0x10) != 0x10) + return (-1); + + limit = USEGD_GETLIMIT(desc) * (desc->usd_gran ? PAGESIZE : 1); + + if (tp->ftt_segment == FASTTRAP_SEG_CS) { + /* + * The code/data bit and readable bit must both be set. + */ + if ((type & 0xa) != 0xa) + return (-1); + + if (*addr > limit) + return (-1); + } else { + /* + * The code/data bit must be clear. + */ + if ((type & 0x8) != 0) + return (-1); + + /* + * If the expand-down bit is clear, we just check the limit as + * it would naturally be applied. Otherwise, we need to check + * that the address is the range [limit + 1 .. 0xffff] or + * [limit + 1 ... 0xffffffff] depending on if the default + * operand size bit is set. + */ + if ((type & 0x4) == 0) { + if (*addr > limit) + return (-1); + } else if (desc->usd_def32) { + if (*addr < limit + 1 || 0xffff < *addr) + return (-1); + } else { + if (*addr < limit + 1 || 0xffffffff < *addr) + return (-1); + } + } + + *addr += USEGD_GETBASE(desc); + + return (0); +} + +int +fasttrap_pid_probe(struct regs *rp) +{ + proc_t *p = curproc; + uintptr_t pc = rp->r_pc - 1, new_pc = 0; + fasttrap_bucket_t *bucket; + kmutex_t *pid_mtx; + fasttrap_tracepoint_t *tp, tp_local; + pid_t pid; + dtrace_icookie_t cookie; + uint_t is_enabled = 0; + + /* + * It's possible that a user (in a veritable orgy of bad planning) + * could redirect this thread's flow of control before it reached the + * return probe fasttrap. In this case we need to kill the process + * since it's in a unrecoverable state. + */ + if (curthread->t_dtrace_step) { + ASSERT(curthread->t_dtrace_on); + fasttrap_sigtrap(p, curthread, pc); + return (0); + } + + /* + * Clear all user tracing flags. + */ + curthread->t_dtrace_ft = 0; + curthread->t_dtrace_pc = 0; + curthread->t_dtrace_npc = 0; + curthread->t_dtrace_scrpc = 0; + curthread->t_dtrace_astpc = 0; +#ifdef __amd64 + curthread->t_dtrace_regv = 0; +#endif + + /* + * Treat a child created by a call to vfork(2) as if it were its + * parent. We know that there's only one thread of control in such a + * process: this one. + */ + while (p->p_flag & SVFORK) { + p = p->p_parent; + } + + pid = p->p_pid; + pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; + mutex_enter(pid_mtx); + bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + + /* + * Lookup the tracepoint that the process just hit. + */ + for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { + if (pid == tp->ftt_pid && pc == tp->ftt_pc && + tp->ftt_proc->ftpc_acount != 0) + break; + } + + /* + * If we couldn't find a matching tracepoint, either a tracepoint has + * been inserted without using the pid<pid> ioctl interface (see + * fasttrap_ioctl), or somehow we have mislaid this tracepoint. + */ + if (tp == NULL) { + mutex_exit(pid_mtx); + return (-1); + } + + /* + * Set the program counter to the address of the traced instruction + * so that it looks right in ustack() output. + */ + rp->r_pc = pc; + + if (tp->ftt_ids != NULL) { + fasttrap_id_t *id; + +#ifdef __amd64 + if (p->p_model == DATAMODEL_LP64) { + for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { + fasttrap_probe_t *probe = id->fti_probe; + + if (id->fti_ptype == DTFTP_ENTRY) { + /* + * We note that this was an entry + * probe to help ustack() find the + * first caller. + */ + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); + dtrace_probe(probe->ftp_id, rp->r_rdi, + rp->r_rsi, rp->r_rdx, rp->r_rcx, + rp->r_r8); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); + dtrace_interrupt_enable(cookie); + } else if (id->fti_ptype == DTFTP_IS_ENABLED) { + /* + * Note that in this case, we don't + * call dtrace_probe() since it's only + * an artificial probe meant to change + * the flow of control so that it + * encounters the true probe. + */ + is_enabled = 1; + } else if (probe->ftp_argmap == NULL) { + dtrace_probe(probe->ftp_id, rp->r_rdi, + rp->r_rsi, rp->r_rdx, rp->r_rcx, + rp->r_r8); + } else { + uintptr_t t[5]; + + fasttrap_usdt_args64(probe, rp, + sizeof (t) / sizeof (t[0]), t); + + dtrace_probe(probe->ftp_id, t[0], t[1], + t[2], t[3], t[4]); + } + } + } else { +#endif + uintptr_t s0, s1, s2, s3, s4, s5; + uint32_t *stack = (uint32_t *)rp->r_sp; + + /* + * In 32-bit mode, all arguments are passed on the + * stack. If this is a function entry probe, we need + * to skip the first entry on the stack as it + * represents the return address rather than a + * parameter to the function. + */ + s0 = fasttrap_fuword32_noerr(&stack[0]); + s1 = fasttrap_fuword32_noerr(&stack[1]); + s2 = fasttrap_fuword32_noerr(&stack[2]); + s3 = fasttrap_fuword32_noerr(&stack[3]); + s4 = fasttrap_fuword32_noerr(&stack[4]); + s5 = fasttrap_fuword32_noerr(&stack[5]); + + for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { + fasttrap_probe_t *probe = id->fti_probe; + + if (id->fti_ptype == DTFTP_ENTRY) { + /* + * We note that this was an entry + * probe to help ustack() find the + * first caller. + */ + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); + dtrace_probe(probe->ftp_id, s1, s2, + s3, s4, s5); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); + dtrace_interrupt_enable(cookie); + } else if (id->fti_ptype == DTFTP_IS_ENABLED) { + /* + * Note that in this case, we don't + * call dtrace_probe() since it's only + * an artificial probe meant to change + * the flow of control so that it + * encounters the true probe. + */ + is_enabled = 1; + } else if (probe->ftp_argmap == NULL) { + dtrace_probe(probe->ftp_id, s0, s1, + s2, s3, s4); + } else { + uint32_t t[5]; + + fasttrap_usdt_args32(probe, rp, + sizeof (t) / sizeof (t[0]), t); + + dtrace_probe(probe->ftp_id, t[0], t[1], + t[2], t[3], t[4]); + } + } +#ifdef __amd64 + } +#endif + } + + /* + * We're about to do a bunch of work so we cache a local copy of + * the tracepoint to emulate the instruction, and then find the + * tracepoint again later if we need to light up any return probes. + */ + tp_local = *tp; + mutex_exit(pid_mtx); + tp = &tp_local; + + /* + * Set the program counter to appear as though the traced instruction + * had completely executed. This ensures that fasttrap_getreg() will + * report the expected value for REG_RIP. + */ + rp->r_pc = pc + tp->ftt_size; + + /* + * If there's an is-enabled probe connected to this tracepoint it + * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax' + * instruction that was placed there by DTrace when the binary was + * linked. As this probe is, in fact, enabled, we need to stuff 1 + * into %eax or %rax. Accordingly, we can bypass all the instruction + * emulation logic since we know the inevitable result. It's possible + * that a user could construct a scenario where the 'is-enabled' + * probe was on some other instruction, but that would be a rather + * exotic way to shoot oneself in the foot. + */ + if (is_enabled) { + rp->r_r0 = 1; + new_pc = rp->r_pc; + goto done; + } + + /* + * We emulate certain types of instructions to ensure correctness + * (in the case of position dependent instructions) or optimize + * common cases. The rest we have the thread execute back in user- + * land. + */ + switch (tp->ftt_type) { + case FASTTRAP_T_RET: + case FASTTRAP_T_RET16: + { + uintptr_t dst; + uintptr_t addr; + int ret; + + /* + * We have to emulate _every_ facet of the behavior of a ret + * instruction including what happens if the load from %esp + * fails; in that case, we send a SIGSEGV. + */ +#ifdef __amd64 + if (p->p_model == DATAMODEL_NATIVE) { +#endif + ret = fasttrap_fulword((void *)rp->r_sp, &dst); + addr = rp->r_sp + sizeof (uintptr_t); +#ifdef __amd64 + } else { + uint32_t dst32; + ret = fasttrap_fuword32((void *)rp->r_sp, &dst32); + dst = dst32; + addr = rp->r_sp + sizeof (uint32_t); + } +#endif + + if (ret == -1) { + fasttrap_sigsegv(p, curthread, rp->r_sp); + new_pc = pc; + break; + } + + if (tp->ftt_type == FASTTRAP_T_RET16) + addr += tp->ftt_dest; + + rp->r_sp = addr; + new_pc = dst; + break; + } + + case FASTTRAP_T_JCC: + { + uint_t taken; + + switch (tp->ftt_code) { + case FASTTRAP_JO: + taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) != 0; + break; + case FASTTRAP_JNO: + taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) == 0; + break; + case FASTTRAP_JB: + taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0; + break; + case FASTTRAP_JAE: + taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0; + break; + case FASTTRAP_JE: + taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0; + break; + case FASTTRAP_JNE: + taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0; + break; + case FASTTRAP_JBE: + taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0 || + (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0; + break; + case FASTTRAP_JA: + taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0 && + (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0; + break; + case FASTTRAP_JS: + taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) != 0; + break; + case FASTTRAP_JNS: + taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) == 0; + break; + case FASTTRAP_JP: + taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) != 0; + break; + case FASTTRAP_JNP: + taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) == 0; + break; + case FASTTRAP_JL: + taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) != + ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); + break; + case FASTTRAP_JGE: + taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) == + ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); + break; + case FASTTRAP_JLE: + taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 || + ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) != + ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); + break; + case FASTTRAP_JG: + taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 && + ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) == + ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); + break; + + } + + if (taken) + new_pc = tp->ftt_dest; + else + new_pc = pc + tp->ftt_size; + break; + } + + case FASTTRAP_T_LOOP: + { + uint_t taken; +#ifdef __amd64 + greg_t cx = rp->r_rcx--; +#else + greg_t cx = rp->r_ecx--; +#endif + + switch (tp->ftt_code) { + case FASTTRAP_LOOPNZ: + taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 && + cx != 0; + break; + case FASTTRAP_LOOPZ: + taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 && + cx != 0; + break; + case FASTTRAP_LOOP: + taken = (cx != 0); + break; + } + + if (taken) + new_pc = tp->ftt_dest; + else + new_pc = pc + tp->ftt_size; + break; + } + + case FASTTRAP_T_JCXZ: + { +#ifdef __amd64 + greg_t cx = rp->r_rcx; +#else + greg_t cx = rp->r_ecx; +#endif + + if (cx == 0) + new_pc = tp->ftt_dest; + else + new_pc = pc + tp->ftt_size; + break; + } + + case FASTTRAP_T_PUSHL_EBP: + { + int ret; + uintptr_t addr; +#ifdef __amd64 + if (p->p_model == DATAMODEL_NATIVE) { +#endif + addr = rp->r_sp - sizeof (uintptr_t); + ret = fasttrap_sulword((void *)addr, rp->r_fp); +#ifdef __amd64 + } else { + addr = rp->r_sp - sizeof (uint32_t); + ret = fasttrap_suword32((void *)addr, + (uint32_t)rp->r_fp); + } +#endif + + if (ret == -1) { + fasttrap_sigsegv(p, curthread, addr); + new_pc = pc; + break; + } + + rp->r_sp = addr; + new_pc = pc + tp->ftt_size; + break; + } + + case FASTTRAP_T_NOP: + new_pc = pc + tp->ftt_size; + break; + + case FASTTRAP_T_JMP: + case FASTTRAP_T_CALL: + if (tp->ftt_code == 0) { + new_pc = tp->ftt_dest; + } else { + uintptr_t value, addr = tp->ftt_dest; + + if (tp->ftt_base != FASTTRAP_NOREG) + addr += fasttrap_getreg(rp, tp->ftt_base); + if (tp->ftt_index != FASTTRAP_NOREG) + addr += fasttrap_getreg(rp, tp->ftt_index) << + tp->ftt_scale; + + if (tp->ftt_code == 1) { + /* + * If there's a segment prefix for this + * instruction, we'll need to check permissions + * and bounds on the given selector, and adjust + * the address accordingly. + */ + if (tp->ftt_segment != FASTTRAP_SEG_NONE && + fasttrap_do_seg(tp, rp, &addr) != 0) { + fasttrap_sigsegv(p, curthread, addr); + new_pc = pc; + break; + } + +#ifdef __amd64 + if (p->p_model == DATAMODEL_NATIVE) { +#endif + if (fasttrap_fulword((void *)addr, + &value) == -1) { + fasttrap_sigsegv(p, curthread, + addr); + new_pc = pc; + break; + } + new_pc = value; +#ifdef __amd64 + } else { + uint32_t value32; + addr = (uintptr_t)(uint32_t)addr; + if (fasttrap_fuword32((void *)addr, + &value32) == -1) { + fasttrap_sigsegv(p, curthread, + addr); + new_pc = pc; + break; + } + new_pc = value32; + } +#endif + } else { + new_pc = addr; + } + } + + /* + * If this is a call instruction, we need to push the return + * address onto the stack. If this fails, we send the process + * a SIGSEGV and reset the pc to emulate what would happen if + * this instruction weren't traced. + */ + if (tp->ftt_type == FASTTRAP_T_CALL) { + int ret; + uintptr_t addr; +#ifdef __amd64 + if (p->p_model == DATAMODEL_NATIVE) { + addr = rp->r_sp - sizeof (uintptr_t); + ret = fasttrap_sulword((void *)addr, + pc + tp->ftt_size); + } else { +#endif + addr = rp->r_sp - sizeof (uint32_t); + ret = fasttrap_suword32((void *)addr, + (uint32_t)(pc + tp->ftt_size)); +#ifdef __amd64 + } +#endif + + if (ret == -1) { + fasttrap_sigsegv(p, curthread, addr); + new_pc = pc; + break; + } + + rp->r_sp = addr; + } + + break; + + case FASTTRAP_T_COMMON: + { + uintptr_t addr; +#if defined(__amd64) + uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22]; +#else + uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7]; +#endif + uint_t i = 0; + klwp_t *lwp = ttolwp(curthread); + + /* + * Compute the address of the ulwp_t and step over the + * ul_self pointer. The method used to store the user-land + * thread pointer is very different on 32- and 64-bit + * kernels. + */ +#if defined(__amd64) + if (p->p_model == DATAMODEL_LP64) { + addr = lwp->lwp_pcb.pcb_fsbase; + addr += sizeof (void *); + } else { + addr = lwp->lwp_pcb.pcb_gsbase; + addr += sizeof (caddr32_t); + } +#else + addr = USEGD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc); + addr += sizeof (void *); +#endif + + /* + * Generic Instruction Tracing + * --------------------------- + * + * This is the layout of the scratch space in the user-land + * thread structure for our generated instructions. + * + * 32-bit mode bytes + * ------------------------ ----- + * a: <original instruction> <= 15 + * jmp <pc + tp->ftt_size> 5 + * b: <original instrction> <= 15 + * int T_DTRACE_RET 2 + * ----- + * <= 37 + * + * 64-bit mode bytes + * ------------------------ ----- + * a: <original instruction> <= 15 + * jmp 0(%rip) 6 + * <pc + tp->ftt_size> 8 + * b: <original instruction> <= 15 + * int T_DTRACE_RET 2 + * ----- + * <= 46 + * + * The %pc is set to a, and curthread->t_dtrace_astpc is set + * to b. If we encounter a signal on the way out of the + * kernel, trap() will set %pc to curthread->t_dtrace_astpc + * so that we execute the original instruction and re-enter + * the kernel rather than redirecting to the next instruction. + * + * If there are return probes (so we know that we're going to + * need to reenter the kernel after executing the original + * instruction), the scratch space will just contain the + * original instruction followed by an interrupt -- the same + * data as at b. + * + * %rip-relative Addressing + * ------------------------ + * + * There's a further complication in 64-bit mode due to %rip- + * relative addressing. While this is clearly a beneficial + * architectural decision for position independent code, it's + * hard not to see it as a personal attack against the pid + * provider since before there was a relatively small set of + * instructions to emulate; with %rip-relative addressing, + * almost every instruction can potentially depend on the + * address at which it's executed. Rather than emulating + * the broad spectrum of instructions that can now be + * position dependent, we emulate jumps and others as in + * 32-bit mode, and take a different tack for instructions + * using %rip-relative addressing. + * + * For every instruction that uses the ModRM byte, the + * in-kernel disassembler reports its location. We use the + * ModRM byte to identify that an instruction uses + * %rip-relative addressing and to see what other registers + * the instruction uses. To emulate those instructions, + * we modify the instruction to be %rax-relative rather than + * %rip-relative (or %rcx-relative if the instruction uses + * %rax; or %r8- or %r9-relative if the REX.B is present so + * we don't have to rewrite the REX prefix). We then load + * the value that %rip would have been into the scratch + * register and generate an instruction to reset the scratch + * register back to its original value. The instruction + * sequence looks like this: + * + * 64-mode %rip-relative bytes + * ------------------------ ----- + * a: <modified instruction> <= 15 + * movq $<value>, %<scratch> 6 + * jmp 0(%rip) 6 + * <pc + tp->ftt_size> 8 + * b: <modified instruction> <= 15 + * int T_DTRACE_RET 2 + * ----- + * 52 + * + * We set curthread->t_dtrace_regv so that upon receiving + * a signal we can reset the value of the scratch register. + */ + + ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE); + + curthread->t_dtrace_scrpc = addr; + bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); + i += tp->ftt_size; + +#ifdef __amd64 + if (tp->ftt_ripmode != 0) { + greg_t *reg; + + ASSERT(p->p_model == DATAMODEL_LP64); + ASSERT(tp->ftt_ripmode & + (FASTTRAP_RIP_1 | FASTTRAP_RIP_2)); + + /* + * If this was a %rip-relative instruction, we change + * it to be either a %rax- or %rcx-relative + * instruction (depending on whether those registers + * are used as another operand; or %r8- or %r9- + * relative depending on the value of REX.B). We then + * set that register and generate a movq instruction + * to reset the value. + */ + if (tp->ftt_ripmode & FASTTRAP_RIP_X) + scratch[i++] = FASTTRAP_REX(1, 0, 0, 1); + else + scratch[i++] = FASTTRAP_REX(1, 0, 0, 0); + + if (tp->ftt_ripmode & FASTTRAP_RIP_1) + scratch[i++] = FASTTRAP_MOV_EAX; + else + scratch[i++] = FASTTRAP_MOV_ECX; + + switch (tp->ftt_ripmode) { + case FASTTRAP_RIP_1: + reg = &rp->r_rax; + curthread->t_dtrace_reg = REG_RAX; + break; + case FASTTRAP_RIP_2: + reg = &rp->r_rcx; + curthread->t_dtrace_reg = REG_RCX; + break; + case FASTTRAP_RIP_1 | FASTTRAP_RIP_X: + reg = &rp->r_r8; + curthread->t_dtrace_reg = REG_R8; + break; + case FASTTRAP_RIP_2 | FASTTRAP_RIP_X: + reg = &rp->r_r9; + curthread->t_dtrace_reg = REG_R9; + break; + } + + /* LINTED - alignment */ + *(uint64_t *)&scratch[i] = *reg; + curthread->t_dtrace_regv = *reg; + *reg = pc + tp->ftt_size; + i += sizeof (uint64_t); + } +#endif + + /* + * Generate the branch instruction to what would have + * normally been the subsequent instruction. In 32-bit mode, + * this is just a relative branch; in 64-bit mode this is a + * %rip-relative branch that loads the 64-bit pc value + * immediately after the jmp instruction. + */ +#ifdef __amd64 + if (p->p_model == DATAMODEL_LP64) { + scratch[i++] = FASTTRAP_GROUP5_OP; + scratch[i++] = FASTTRAP_MODRM(0, 4, 5); + /* LINTED - alignment */ + *(uint32_t *)&scratch[i] = 0; + i += sizeof (uint32_t); + /* LINTED - alignment */ + *(uint64_t *)&scratch[i] = pc + tp->ftt_size; + i += sizeof (uint64_t); + } else { +#endif + /* + * Set up the jmp to the next instruction; note that + * the size of the traced instruction cancels out. + */ + scratch[i++] = FASTTRAP_JMP32; + /* LINTED - alignment */ + *(uint32_t *)&scratch[i] = pc - addr - 5; + i += sizeof (uint32_t); +#ifdef __amd64 + } +#endif + + curthread->t_dtrace_astpc = addr + i; + bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); + i += tp->ftt_size; + scratch[i++] = FASTTRAP_INT; + scratch[i++] = T_DTRACE_RET; + + ASSERT(i <= sizeof (scratch)); + + if (fasttrap_copyout(scratch, (char *)addr, i)) { + fasttrap_sigtrap(p, curthread, pc); + new_pc = pc; + break; + } + + if (tp->ftt_retids != NULL) { + curthread->t_dtrace_step = 1; + curthread->t_dtrace_ret = 1; + new_pc = curthread->t_dtrace_astpc; + } else { + new_pc = curthread->t_dtrace_scrpc; + } + + curthread->t_dtrace_pc = pc; + curthread->t_dtrace_npc = pc + tp->ftt_size; + curthread->t_dtrace_on = 1; + break; + } + + default: + panic("fasttrap: mishandled an instruction"); + } + +done: + /* + * If there were no return probes when we first found the tracepoint, + * we should feel no obligation to honor any return probes that were + * subsequently enabled -- they'll just have to wait until the next + * time around. + */ + if (tp->ftt_retids != NULL) { + /* + * We need to wait until the results of the instruction are + * apparent before invoking any return probes. If this + * instruction was emulated we can just call + * fasttrap_return_common(); if it needs to be executed, we + * need to wait until the user thread returns to the kernel. + */ + if (tp->ftt_type != FASTTRAP_T_COMMON) { + /* + * Set the program counter to the address of the traced + * instruction so that it looks right in ustack() + * output. We had previously set it to the end of the + * instruction to simplify %rip-relative addressing. + */ + rp->r_pc = pc; + + fasttrap_return_common(rp, pc, pid, new_pc); + } else { + ASSERT(curthread->t_dtrace_ret != 0); + ASSERT(curthread->t_dtrace_pc == pc); + ASSERT(curthread->t_dtrace_scrpc != 0); + ASSERT(new_pc == curthread->t_dtrace_astpc); + } + } + + rp->r_pc = new_pc; + + return (0); +} + +int +fasttrap_return_probe(struct regs *rp) +{ + proc_t *p = curproc; + uintptr_t pc = curthread->t_dtrace_pc; + uintptr_t npc = curthread->t_dtrace_npc; + + curthread->t_dtrace_pc = 0; + curthread->t_dtrace_npc = 0; + curthread->t_dtrace_scrpc = 0; + curthread->t_dtrace_astpc = 0; + + /* + * Treat a child created by a call to vfork(2) as if it were its + * parent. We know that there's only one thread of control in such a + * process: this one. + */ + while (p->p_flag & SVFORK) { + p = p->p_parent; + } + + /* + * We set rp->r_pc to the address of the traced instruction so + * that it appears to dtrace_probe() that we're on the original + * instruction, and so that the user can't easily detect our + * complex web of lies. dtrace_return_probe() (our caller) + * will correctly set %pc after we return. + */ + rp->r_pc = pc; + + fasttrap_return_common(rp, pc, p->p_pid, npc); + + return (0); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, + int aframes) +{ + return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 1, argno)); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, + int aframes) +{ + return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 0, argno)); +} + +static ulong_t +fasttrap_getreg(struct regs *rp, uint_t reg) +{ +#ifdef __amd64 + switch (reg) { + case REG_R15: return (rp->r_r15); + case REG_R14: return (rp->r_r14); + case REG_R13: return (rp->r_r13); + case REG_R12: return (rp->r_r12); + case REG_R11: return (rp->r_r11); + case REG_R10: return (rp->r_r10); + case REG_R9: return (rp->r_r9); + case REG_R8: return (rp->r_r8); + case REG_RDI: return (rp->r_rdi); + case REG_RSI: return (rp->r_rsi); + case REG_RBP: return (rp->r_rbp); + case REG_RBX: return (rp->r_rbx); + case REG_RDX: return (rp->r_rdx); + case REG_RCX: return (rp->r_rcx); + case REG_RAX: return (rp->r_rax); + case REG_TRAPNO: return (rp->r_trapno); + case REG_ERR: return (rp->r_err); + case REG_RIP: return (rp->r_rip); + case REG_CS: return (rp->r_cs); + case REG_RFL: return (rp->r_rfl); + case REG_RSP: return (rp->r_rsp); + case REG_SS: return (rp->r_ss); + case REG_FS: return (rp->r_fs); + case REG_GS: return (rp->r_gs); + case REG_DS: return (rp->r_ds); + case REG_ES: return (rp->r_es); + case REG_FSBASE: return (rdmsr(MSR_AMD_FSBASE)); + case REG_GSBASE: return (rdmsr(MSR_AMD_GSBASE)); + } + + panic("dtrace: illegal register constant"); + /*NOTREACHED*/ +#else + if (reg >= _NGREG) + panic("dtrace: illegal register constant"); + + return (((greg_t *)&rp->r_gs)[reg]); +#endif +} diff --git a/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/fbt.c b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/fbt.c new file mode 100644 index 00000000..b9353bd3 --- /dev/null +++ b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/fbt.c @@ -0,0 +1,849 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#include <sys/modctl.h> +#include <sys/dtrace.h> +#include <sys/kobj.h> +#include <sys/stat.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/conf.h> + +#define FBT_PUSHL_EBP 0x55 +#define FBT_MOVL_ESP_EBP0_V0 0x8b +#define FBT_MOVL_ESP_EBP1_V0 0xec +#define FBT_MOVL_ESP_EBP0_V1 0x89 +#define FBT_MOVL_ESP_EBP1_V1 0xe5 +#define FBT_REX_RSP_RBP 0x48 + +#define FBT_POPL_EBP 0x5d +#define FBT_RET 0xc3 +#define FBT_RET_IMM16 0xc2 +#define FBT_LEAVE 0xc9 + +#ifdef __amd64 +#define FBT_PATCHVAL 0xcc +#else +#define FBT_PATCHVAL 0xf0 +#endif + +#define FBT_ENTRY "entry" +#define FBT_RETURN "return" +#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask) +#define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */ + +typedef struct fbt_probe { + struct fbt_probe *fbtp_hashnext; + uint8_t *fbtp_patchpoint; + int8_t fbtp_rval; + uint8_t fbtp_patchval; + uint8_t fbtp_savedval; + uintptr_t fbtp_roffset; + dtrace_id_t fbtp_id; + char *fbtp_name; + struct modctl *fbtp_ctl; + int fbtp_loadcnt; + int fbtp_symndx; + int fbtp_primary; + struct fbt_probe *fbtp_next; +} fbt_probe_t; + +static dev_info_t *fbt_devi; +static dtrace_provider_id_t fbt_id; +static fbt_probe_t **fbt_probetab; +static int fbt_probetab_size; +static int fbt_probetab_mask; +static int fbt_verbose = 0; + +static int +fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval) +{ + uintptr_t stack0, stack1, stack2, stack3, stack4; + fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; + + for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { + if ((uintptr_t)fbt->fbtp_patchpoint == addr) { + if (fbt->fbtp_roffset == 0) { + int i = 0; + /* + * When accessing the arguments on the stack, + * we must protect against accessing beyond + * the stack. We can safely set NOFAULT here + * -- we know that interrupts are already + * disabled. + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + CPU->cpu_dtrace_caller = stack[i++]; +#ifdef __amd64 + /* + * On amd64, stack[0] contains the dereferenced + * stack pointer, stack[1] contains savfp, + * stack[2] contains savpc. We want to step + * over these entries. + */ + i += 2; +#endif + stack0 = stack[i++]; + stack1 = stack[i++]; + stack2 = stack[i++]; + stack3 = stack[i++]; + stack4 = stack[i++]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | + CPU_DTRACE_BADADDR); + + dtrace_probe(fbt->fbtp_id, stack0, stack1, + stack2, stack3, stack4); + + CPU->cpu_dtrace_caller = NULL; + } else { +#ifdef __amd64 + /* + * On amd64, we instrument the ret, not the + * leave. We therefore need to set the caller + * to assure that the top frame of a stack() + * action is correct. + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + CPU->cpu_dtrace_caller = stack[0]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | + CPU_DTRACE_BADADDR); +#endif + + dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, + rval, 0, 0, 0); + CPU->cpu_dtrace_caller = NULL; + } + + return (fbt->fbtp_rval); + } + } + + return (0); +} + +/*ARGSUSED*/ +static void +fbt_provide_module(void *arg, struct modctl *ctl) +{ + struct module *mp = ctl->mod_mp; + char *str = mp->strings; + int nsyms = mp->nsyms; + Shdr *symhdr = mp->symhdr; + char *modname = ctl->mod_modname; + char *name; + fbt_probe_t *fbt, *retfbt; + size_t symsize; + int i, size; + + /* + * Employees of dtrace and their families are ineligible. Void + * where prohibited. + */ + if (strcmp(modname, "dtrace") == 0) + return; + + if (ctl->mod_requisites != NULL) { + struct modctl_list *list; + + list = (struct modctl_list *)ctl->mod_requisites; + + for (; list != NULL; list = list->modl_next) { + if (strcmp(list->modl_modp->mod_modname, "dtrace") == 0) + return; + } + } + + /* + * KMDB is ineligible for instrumentation -- it may execute in + * any context, including probe context. + */ + if (strcmp(modname, "kmdbmod") == 0) + return; + + if (str == NULL || symhdr == NULL || symhdr->sh_addr == NULL) { + /* + * If this module doesn't (yet) have its string or symbol + * table allocated, clear out. + */ + return; + } + + symsize = symhdr->sh_entsize; + + if (mp->fbt_nentries) { + /* + * This module has some FBT entries allocated; we're afraid + * to screw with it. + */ + return; + } + + for (i = 1; i < nsyms; i++) { + uint8_t *instr, *limit; + Sym *sym = (Sym *)(symhdr->sh_addr + i * symsize); + int j; + + if (ELF_ST_TYPE(sym->st_info) != STT_FUNC) + continue; + + /* + * Weak symbols are not candidates. This could be made to + * work (where weak functions and their underlying function + * appear as two disjoint probes), but it's not simple. + */ + if (ELF_ST_BIND(sym->st_info) == STB_WEAK) + continue; + + name = str + sym->st_name; + + if (strstr(name, "dtrace_") == name && + strstr(name, "dtrace_safe_") != name) { + /* + * Anything beginning with "dtrace_" may be called + * from probe context unless it explitly indicates + * that it won't be called from probe context by + * using the prefix "dtrace_safe_". + */ + continue; + } + + if (strstr(name, "kdi_") == name || + strstr(name, "_kdi_") != NULL) { + /* + * Any function name beginning with "kdi_" or + * containing the string "_kdi_" is a part of the + * kernel debugger interface and may be called in + * arbitrary context -- including probe context. + */ + continue; + } + + /* + * Due to 4524008, _init and _fini may have a bloated st_size. + * While this bug was fixed quite some time ago, old drivers + * may be lurking. We need to develop a better solution to + * this problem, such that correct _init and _fini functions + * (the vast majority) may be correctly traced. One solution + * may be to scan through the entire symbol table to see if + * any symbol overlaps with _init. If none does, set a bit in + * the module structure that this module has correct _init and + * _fini sizes. This will cause some pain the first time a + * module is scanned, but at least it would be O(N) instead of + * O(N log N)... + */ + if (strcmp(name, "_init") == 0) + continue; + + if (strcmp(name, "_fini") == 0) + continue; + + /* + * In order to be eligible, the function must begin with the + * following sequence: + * + * pushl %esp + * movl %esp, %ebp + * + * Note that there are two variants of encodings that generate + * the movl; we must check for both. For 64-bit, we would + * normally insist that a function begin with the following + * sequence: + * + * pushq %rbp + * movq %rsp, %rbp + * + * However, the compiler for 64-bit often splits these two + * instructions -- and the first instruction in the function + * is often not the pushq. As a result, on 64-bit we look + * for any "pushq %rbp" in the function and we instrument + * this with a breakpoint instruction. + */ + instr = (uint8_t *)sym->st_value; + limit = (uint8_t *)(sym->st_value + sym->st_size); + +#ifdef __amd64 + while (instr < limit) { + if (*instr == FBT_PUSHL_EBP) + break; + + if ((size = dtrace_instr_size(instr)) <= 0) + break; + + instr += size; + } + + if (instr >= limit || *instr != FBT_PUSHL_EBP) { + /* + * We either don't save the frame pointer in this + * function, or we ran into some disassembly + * screw-up. Either way, we bail. + */ + continue; + } +#else + if (instr[0] != FBT_PUSHL_EBP) + continue; + + if (!(instr[1] == FBT_MOVL_ESP_EBP0_V0 && + instr[2] == FBT_MOVL_ESP_EBP1_V0) && + !(instr[1] == FBT_MOVL_ESP_EBP0_V1 && + instr[2] == FBT_MOVL_ESP_EBP1_V1)) + continue; +#endif + + fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); + fbt->fbtp_name = name; + fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, + name, FBT_ENTRY, 3, fbt); + fbt->fbtp_patchpoint = instr; + fbt->fbtp_ctl = ctl; + fbt->fbtp_loadcnt = ctl->mod_loadcnt; + fbt->fbtp_rval = DTRACE_INVOP_PUSHL_EBP; + fbt->fbtp_savedval = *instr; + fbt->fbtp_patchval = FBT_PATCHVAL; + + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt->fbtp_symndx = i; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; + + mp->fbt_nentries++; + + retfbt = NULL; +again: + if (instr >= limit) + continue; + + /* + * If this disassembly fails, then we've likely walked off into + * a jump table or some other unsuitable area. Bail out of the + * disassembly now. + */ + if ((size = dtrace_instr_size(instr)) <= 0) + continue; + +#ifdef __amd64 + /* + * We only instrument "ret" on amd64 -- we don't yet instrument + * ret imm16, largely because the compiler doesn't seem to + * (yet) emit them in the kernel... + */ + if (*instr != FBT_RET) { + instr += size; + goto again; + } +#else + if (!(size == 1 && + (*instr == FBT_POPL_EBP || *instr == FBT_LEAVE) && + (*(instr + 1) == FBT_RET || + *(instr + 1) == FBT_RET_IMM16))) { + instr += size; + goto again; + } +#endif + + /* + * We (desperately) want to avoid erroneously instrumenting a + * jump table, especially given that our markers are pretty + * short: two bytes on x86, and just one byte on amd64. To + * determine if we're looking at a true instruction sequence + * or an inline jump table that happens to contain the same + * byte sequences, we resort to some heuristic sleeze: we + * treat this instruction as being contained within a pointer, + * and see if that pointer points to within the body of the + * function. If it does, we refuse to instrument it. + */ + for (j = 0; j < sizeof (uintptr_t); j++) { + uintptr_t check = (uintptr_t)instr - j; + uint8_t *ptr; + + if (check < sym->st_value) + break; + + if (check + sizeof (uintptr_t) > (uintptr_t)limit) + continue; + + ptr = *(uint8_t **)check; + + if (ptr >= (uint8_t *)sym->st_value && ptr < limit) { + instr += size; + goto again; + } + } + + /* + * We have a winner! + */ + fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); + fbt->fbtp_name = name; + + if (retfbt == NULL) { + fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, + name, FBT_RETURN, 3, fbt); + } else { + retfbt->fbtp_next = fbt; + fbt->fbtp_id = retfbt->fbtp_id; + } + + retfbt = fbt; + fbt->fbtp_patchpoint = instr; + fbt->fbtp_ctl = ctl; + fbt->fbtp_loadcnt = ctl->mod_loadcnt; + +#ifndef __amd64 + if (*instr == FBT_POPL_EBP) { + fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP; + } else { + ASSERT(*instr == FBT_LEAVE); + fbt->fbtp_rval = DTRACE_INVOP_LEAVE; + } + fbt->fbtp_roffset = + (uintptr_t)(instr - (uint8_t *)sym->st_value) + 1; + +#else + ASSERT(*instr == FBT_RET); + fbt->fbtp_rval = DTRACE_INVOP_RET; + fbt->fbtp_roffset = + (uintptr_t)(instr - (uint8_t *)sym->st_value); +#endif + + fbt->fbtp_savedval = *instr; + fbt->fbtp_patchval = FBT_PATCHVAL; + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt->fbtp_symndx = i; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; + + mp->fbt_nentries++; + + instr += size; + goto again; + } +} + +/*ARGSUSED*/ +static void +fbt_destroy(void *arg, dtrace_id_t id, void *parg) +{ + fbt_probe_t *fbt = parg, *next, *hash, *last; + struct modctl *ctl = fbt->fbtp_ctl; + int ndx; + + do { + if (ctl != NULL && ctl->mod_loadcnt == fbt->fbtp_loadcnt) { + if ((ctl->mod_loadcnt == fbt->fbtp_loadcnt && + ctl->mod_loaded)) { + ((struct module *) + (ctl->mod_mp))->fbt_nentries--; + } + } + + /* + * Now we need to remove this probe from the fbt_probetab. + */ + ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint); + last = NULL; + hash = fbt_probetab[ndx]; + + while (hash != fbt) { + ASSERT(hash != NULL); + last = hash; + hash = hash->fbtp_hashnext; + } + + if (last != NULL) { + last->fbtp_hashnext = fbt->fbtp_hashnext; + } else { + fbt_probetab[ndx] = fbt->fbtp_hashnext; + } + + next = fbt->fbtp_next; + kmem_free(fbt, sizeof (fbt_probe_t)); + + fbt = next; + } while (fbt != NULL); +} + +/*ARGSUSED*/ +static int +fbt_enable(void *arg, dtrace_id_t id, void *parg) +{ + fbt_probe_t *fbt = parg; + struct modctl *ctl = fbt->fbtp_ctl; + + ctl->mod_nenabled++; + + if (!ctl->mod_loaded) { + if (fbt_verbose) { + cmn_err(CE_NOTE, "fbt is failing for probe %s " + "(module %s unloaded)", + fbt->fbtp_name, ctl->mod_modname); + } + + return (0); + } + + /* + * Now check that our modctl has the expected load count. If it + * doesn't, this module must have been unloaded and reloaded -- and + * we're not going to touch it. + */ + if (ctl->mod_loadcnt != fbt->fbtp_loadcnt) { + if (fbt_verbose) { + cmn_err(CE_NOTE, "fbt is failing for probe %s " + "(module %s reloaded)", + fbt->fbtp_name, ctl->mod_modname); + } + + return (0); + } + + for (; fbt != NULL; fbt = fbt->fbtp_next) + *fbt->fbtp_patchpoint = fbt->fbtp_patchval; + + return (0); +} + +/*ARGSUSED*/ +static void +fbt_disable(void *arg, dtrace_id_t id, void *parg) +{ + fbt_probe_t *fbt = parg; + struct modctl *ctl = fbt->fbtp_ctl; + + ASSERT(ctl->mod_nenabled > 0); + ctl->mod_nenabled--; + + if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt)) + return; + + for (; fbt != NULL; fbt = fbt->fbtp_next) + *fbt->fbtp_patchpoint = fbt->fbtp_savedval; +} + +/*ARGSUSED*/ +static void +fbt_suspend(void *arg, dtrace_id_t id, void *parg) +{ + fbt_probe_t *fbt = parg; + struct modctl *ctl = fbt->fbtp_ctl; + + ASSERT(ctl->mod_nenabled > 0); + + if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt)) + return; + + for (; fbt != NULL; fbt = fbt->fbtp_next) + *fbt->fbtp_patchpoint = fbt->fbtp_savedval; +} + +/*ARGSUSED*/ +static void +fbt_resume(void *arg, dtrace_id_t id, void *parg) +{ + fbt_probe_t *fbt = parg; + struct modctl *ctl = fbt->fbtp_ctl; + + ASSERT(ctl->mod_nenabled > 0); + + if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt)) + return; + + for (; fbt != NULL; fbt = fbt->fbtp_next) + *fbt->fbtp_patchpoint = fbt->fbtp_patchval; +} + +/*ARGSUSED*/ +static void +fbt_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) +{ + fbt_probe_t *fbt = parg; + struct modctl *ctl = fbt->fbtp_ctl; + struct module *mp = ctl->mod_mp; + ctf_file_t *fp = NULL, *pfp; + ctf_funcinfo_t f; + int error; + ctf_id_t argv[32], type; + int argc = sizeof (argv) / sizeof (ctf_id_t); + const char *parent; + + if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt)) + goto err; + + if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) { + (void) strcpy(desc->dtargd_native, "int"); + return; + } + + if ((fp = ctf_modopen(mp, &error)) == NULL) { + /* + * We have no CTF information for this module -- and therefore + * no args[] information. + */ + goto err; + } + + /* + * If we have a parent container, we must manually import it. + */ + if ((parent = ctf_parent_name(fp)) != NULL) { + struct modctl *mp = &modules; + struct modctl *mod = NULL; + + /* + * We must iterate over all modules to find the module that + * is our parent. + */ + do { + if (strcmp(mp->mod_modname, parent) == 0) { + mod = mp; + break; + } + } while ((mp = mp->mod_next) != &modules); + + if (mod == NULL) + goto err; + + if ((pfp = ctf_modopen(mod->mod_mp, &error)) == NULL) { + goto err; + } + + if (ctf_import(fp, pfp) != 0) { + ctf_close(pfp); + goto err; + } + + ctf_close(pfp); + } + + if (ctf_func_info(fp, fbt->fbtp_symndx, &f) == CTF_ERR) + goto err; + + if (fbt->fbtp_roffset != 0) { + if (desc->dtargd_ndx > 1) + goto err; + + ASSERT(desc->dtargd_ndx == 1); + type = f.ctc_return; + } else { + if (desc->dtargd_ndx + 1 > f.ctc_argc) + goto err; + + if (ctf_func_args(fp, fbt->fbtp_symndx, argc, argv) == CTF_ERR) + goto err; + + type = argv[desc->dtargd_ndx]; + } + + if (ctf_type_name(fp, type, desc->dtargd_native, + DTRACE_ARGTYPELEN) != NULL) { + ctf_close(fp); + return; + } +err: + if (fp != NULL) + ctf_close(fp); + + desc->dtargd_ndx = DTRACE_ARGNONE; +} + +static dtrace_pattr_t fbt_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +}; + +static dtrace_pops_t fbt_pops = { + NULL, + fbt_provide_module, + fbt_enable, + fbt_disable, + fbt_suspend, + fbt_resume, + fbt_getargdesc, + NULL, + NULL, + fbt_destroy +}; + +static void +fbt_cleanup(dev_info_t *devi) +{ + dtrace_invop_remove(fbt_invop); + ddi_remove_minor_node(devi, NULL); + kmem_free(fbt_probetab, fbt_probetab_size * sizeof (fbt_probe_t *)); + fbt_probetab = NULL; + fbt_probetab_mask = 0; +} + +static int +fbt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) +{ + switch (cmd) { + case DDI_ATTACH: + break; + case DDI_RESUME: + return (DDI_SUCCESS); + default: + return (DDI_FAILURE); + } + + if (fbt_probetab_size == 0) + fbt_probetab_size = FBT_PROBETAB_SIZE; + + fbt_probetab_mask = fbt_probetab_size - 1; + fbt_probetab = + kmem_zalloc(fbt_probetab_size * sizeof (fbt_probe_t *), KM_SLEEP); + + dtrace_invop_add(fbt_invop); + + if (ddi_create_minor_node(devi, "fbt", S_IFCHR, 0, + DDI_PSEUDO, NULL) == DDI_FAILURE || + dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_KERNEL, NULL, + &fbt_pops, NULL, &fbt_id) != 0) { + fbt_cleanup(devi); + return (DDI_FAILURE); + } + + ddi_report_dev(devi); + fbt_devi = devi; + + return (DDI_SUCCESS); +} + +static int +fbt_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) +{ + switch (cmd) { + case DDI_DETACH: + break; + case DDI_SUSPEND: + return (DDI_SUCCESS); + default: + return (DDI_FAILURE); + } + + if (dtrace_unregister(fbt_id) != 0) + return (DDI_FAILURE); + + fbt_cleanup(devi); + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +fbt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) +{ + int error; + + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + *result = (void *)fbt_devi; + error = DDI_SUCCESS; + break; + case DDI_INFO_DEVT2INSTANCE: + *result = (void *)0; + error = DDI_SUCCESS; + break; + default: + error = DDI_FAILURE; + } + return (error); +} + +/*ARGSUSED*/ +static int +fbt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) +{ + return (0); +} + +static struct cb_ops fbt_cb_ops = { + fbt_open, /* open */ + nodev, /* close */ + nulldev, /* strategy */ + nulldev, /* print */ + nodev, /* dump */ + nodev, /* read */ + nodev, /* write */ + nodev, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + nochpoll, /* poll */ + ddi_prop_op, /* cb_prop_op */ + 0, /* streamtab */ + D_NEW | D_MP /* Driver compatibility flag */ +}; + +static struct dev_ops fbt_ops = { + DEVO_REV, /* devo_rev */ + 0, /* refcnt */ + fbt_info, /* get_dev_info */ + nulldev, /* identify */ + nulldev, /* probe */ + fbt_attach, /* attach */ + fbt_detach, /* detach */ + nodev, /* reset */ + &fbt_cb_ops, /* driver operations */ + NULL, /* bus operations */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ +}; + +/* + * Module linkage information for the kernel. + */ +static struct modldrv modldrv = { + &mod_driverops, /* module type (this is a pseudo driver) */ + "Function Boundary Tracing", /* name of module */ + &fbt_ops, /* driver ops */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, + (void *)&modldrv, + NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} diff --git a/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/fbt.conf b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/fbt.conf new file mode 100644 index 00000000..8e7cbe1e --- /dev/null +++ b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/fbt.conf @@ -0,0 +1,28 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +name="fbt" parent="pseudo" instance=0; diff --git a/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/sdt.c b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/sdt.c new file mode 100644 index 00000000..38be2233 --- /dev/null +++ b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/sdt.c @@ -0,0 +1,557 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#include <sys/modctl.h> +#include <sys/sunddi.h> +#include <sys/dtrace.h> +#include <sys/kobj.h> +#include <sys/stat.h> +#include <sys/conf.h> +#include <vm/seg_kmem.h> +#include <sys/stack.h> +#include <sys/frame.h> +#include <sys/dtrace_impl.h> +#include <sys/cmn_err.h> +#include <sys/sysmacros.h> +#include <sys/privregs.h> +#include <sys/sdt_impl.h> + +#define SDT_PATCHVAL 0xf0 +#define SDT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & sdt_probetab_mask) +#define SDT_PROBETAB_SIZE 0x1000 /* 4k entries -- 16K total */ + +static dev_info_t *sdt_devi; +static int sdt_verbose = 0; +static sdt_probe_t **sdt_probetab; +static int sdt_probetab_size; +static int sdt_probetab_mask; + +/*ARGSUSED*/ +static int +sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) +{ + uintptr_t stack0, stack1, stack2, stack3, stack4; + int i = 0; + sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)]; + +#ifdef __amd64 + /* + * On amd64, stack[0] contains the dereferenced stack pointer, + * stack[1] contains savfp, stack[2] contains savpc. We want + * to step over these entries. + */ + i += 3; +#endif + + for (; sdt != NULL; sdt = sdt->sdp_hashnext) { + if ((uintptr_t)sdt->sdp_patchpoint == addr) { + /* + * When accessing the arguments on the stack, we must + * protect against accessing beyond the stack. We can + * safely set NOFAULT here -- we know that interrupts + * are already disabled. + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + stack0 = stack[i++]; + stack1 = stack[i++]; + stack2 = stack[i++]; + stack3 = stack[i++]; + stack4 = stack[i++]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | + CPU_DTRACE_BADADDR); + + dtrace_probe(sdt->sdp_id, stack0, stack1, + stack2, stack3, stack4); + + return (DTRACE_INVOP_NOP); + } + } + + return (0); +} + +/*ARGSUSED*/ +static void +sdt_provide_module(void *arg, struct modctl *ctl) +{ + struct module *mp = ctl->mod_mp; + char *modname = ctl->mod_modname; + sdt_probedesc_t *sdpd; + sdt_probe_t *sdp, *old; + sdt_provider_t *prov; + int len; + + /* + * One for all, and all for one: if we haven't yet registered all of + * our providers, we'll refuse to provide anything. + */ + for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) { + if (prov->sdtp_id == DTRACE_PROVNONE) + return; + } + + if (mp->sdt_nprobes != 0 || (sdpd = mp->sdt_probes) == NULL) + return; + + for (sdpd = mp->sdt_probes; sdpd != NULL; sdpd = sdpd->sdpd_next) { + char *name = sdpd->sdpd_name, *func, *nname; + int i, j; + sdt_provider_t *prov; + ulong_t offs; + dtrace_id_t id; + + for (prov = sdt_providers; prov->sdtp_prefix != NULL; prov++) { + char *prefix = prov->sdtp_prefix; + + if (strncmp(name, prefix, strlen(prefix)) == 0) { + name += strlen(prefix); + break; + } + } + + nname = kmem_alloc(len = strlen(name) + 1, KM_SLEEP); + + for (i = 0, j = 0; name[j] != '\0'; i++) { + if (name[j] == '_' && name[j + 1] == '_') { + nname[i] = '-'; + j += 2; + } else { + nname[i] = name[j++]; + } + } + + nname[i] = '\0'; + + sdp = kmem_zalloc(sizeof (sdt_probe_t), KM_SLEEP); + sdp->sdp_loadcnt = ctl->mod_loadcnt; + sdp->sdp_ctl = ctl; + sdp->sdp_name = nname; + sdp->sdp_namelen = len; + sdp->sdp_provider = prov; + + func = kobj_searchsym(mp, sdpd->sdpd_offset, &offs); + + if (func == NULL) + func = "<unknown>"; + + /* + * We have our provider. Now create the probe. + */ + if ((id = dtrace_probe_lookup(prov->sdtp_id, modname, + func, nname)) != DTRACE_IDNONE) { + old = dtrace_probe_arg(prov->sdtp_id, id); + ASSERT(old != NULL); + + sdp->sdp_next = old->sdp_next; + sdp->sdp_id = id; + old->sdp_next = sdp; + } else { + sdp->sdp_id = dtrace_probe_create(prov->sdtp_id, + modname, func, nname, 3, sdp); + + mp->sdt_nprobes++; + } + + sdp->sdp_hashnext = + sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)]; + sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)] = sdp; + + sdp->sdp_patchval = SDT_PATCHVAL; + sdp->sdp_patchpoint = (uint8_t *)sdpd->sdpd_offset; + sdp->sdp_savedval = *sdp->sdp_patchpoint; + } +} + +/*ARGSUSED*/ +static void +sdt_destroy(void *arg, dtrace_id_t id, void *parg) +{ + sdt_probe_t *sdp = parg, *old, *last, *hash; + struct modctl *ctl = sdp->sdp_ctl; + int ndx; + + if (ctl != NULL && ctl->mod_loadcnt == sdp->sdp_loadcnt) { + if ((ctl->mod_loadcnt == sdp->sdp_loadcnt && + ctl->mod_loaded)) { + ((struct module *)(ctl->mod_mp))->sdt_nprobes--; + } + } + + while (sdp != NULL) { + old = sdp; + + /* + * Now we need to remove this probe from the sdt_probetab. + */ + ndx = SDT_ADDR2NDX(sdp->sdp_patchpoint); + last = NULL; + hash = sdt_probetab[ndx]; + + while (hash != sdp) { + ASSERT(hash != NULL); + last = hash; + hash = hash->sdp_hashnext; + } + + if (last != NULL) { + last->sdp_hashnext = sdp->sdp_hashnext; + } else { + sdt_probetab[ndx] = sdp->sdp_hashnext; + } + + kmem_free(sdp->sdp_name, sdp->sdp_namelen); + sdp = sdp->sdp_next; + kmem_free(old, sizeof (sdt_probe_t)); + } +} + +/*ARGSUSED*/ +static int +sdt_enable(void *arg, dtrace_id_t id, void *parg) +{ + sdt_probe_t *sdp = parg; + struct modctl *ctl = sdp->sdp_ctl; + + ctl->mod_nenabled++; + + /* + * If this module has disappeared since we discovered its probes, + * refuse to enable it. + */ + if (!ctl->mod_loaded) { + if (sdt_verbose) { + cmn_err(CE_NOTE, "sdt is failing for probe %s " + "(module %s unloaded)", + sdp->sdp_name, ctl->mod_modname); + } + goto err; + } + + /* + * Now check that our modctl has the expected load count. If it + * doesn't, this module must have been unloaded and reloaded -- and + * we're not going to touch it. + */ + if (ctl->mod_loadcnt != sdp->sdp_loadcnt) { + if (sdt_verbose) { + cmn_err(CE_NOTE, "sdt is failing for probe %s " + "(module %s reloaded)", + sdp->sdp_name, ctl->mod_modname); + } + goto err; + } + + while (sdp != NULL) { + *sdp->sdp_patchpoint = sdp->sdp_patchval; + sdp = sdp->sdp_next; + } +err: + return (0); +} + +/*ARGSUSED*/ +static void +sdt_disable(void *arg, dtrace_id_t id, void *parg) +{ + sdt_probe_t *sdp = parg; + struct modctl *ctl = sdp->sdp_ctl; + + ctl->mod_nenabled--; + + if (!ctl->mod_loaded || ctl->mod_loadcnt != sdp->sdp_loadcnt) + goto err; + + while (sdp != NULL) { + *sdp->sdp_patchpoint = sdp->sdp_savedval; + sdp = sdp->sdp_next; + } + +err: + ; +} + +/*ARGSUSED*/ +uint64_t +sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) +{ + uintptr_t val; + struct frame *fp = (struct frame *)dtrace_getfp(); + uintptr_t *stack; + int i; +#if defined(__amd64) + /* + * A total of 6 arguments are passed via registers; any argument with + * index of 5 or lower is therefore in a register. + */ + int inreg = 5; +#endif + + for (i = 1; i <= aframes; i++) { + fp = (struct frame *)(fp->fr_savfp); + + if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) { +#if !defined(__amd64) + /* + * If we pass through the invalid op handler, we will + * use the pointer that it passed to the stack as the + * second argument to dtrace_invop() as the pointer to + * the stack. + */ + stack = ((uintptr_t **)&fp[1])[1]; +#else + /* + * In the case of amd64, we will use the pointer to the + * regs structure that was pushed when we took the + * trap. To get this structure, we must increment + * beyond the frame structure. If the argument that + * we're seeking is passed on the stack, we'll pull + * the true stack pointer out of the saved registers + * and decrement our argument by the number of + * arguments passed in registers; if the argument + * we're seeking is passed in regsiters, we can just + * load it directly. + */ + struct regs *rp = (struct regs *)((uintptr_t)&fp[1] + + sizeof (uintptr_t)); + + if (argno <= inreg) { + stack = (uintptr_t *)&rp->r_rdi; + } else { + stack = (uintptr_t *)(rp->r_rsp); + argno -= (inreg + 1); + } +#endif + goto load; + } + } + + /* + * We know that we did not come through a trap to get into + * dtrace_probe() -- the provider simply called dtrace_probe() + * directly. As this is the case, we need to shift the argument + * that we're looking for: the probe ID is the first argument to + * dtrace_probe(), so the argument n will actually be found where + * one would expect to find argument (n + 1). + */ + argno++; + +#if defined(__amd64) + if (argno <= inreg) { + /* + * This shouldn't happen. If the argument is passed in a + * register then it should have been, well, passed in a + * register... + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + + argno -= (inreg + 1); +#endif + stack = (uintptr_t *)&fp[1]; + +load: + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + val = stack[argno]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + + return (val); +} + +static dtrace_pops_t sdt_pops = { + NULL, + sdt_provide_module, + sdt_enable, + sdt_disable, + NULL, + NULL, + sdt_getargdesc, + sdt_getarg, + NULL, + sdt_destroy +}; + +/*ARGSUSED*/ +static int +sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) +{ + sdt_provider_t *prov; + + if (ddi_create_minor_node(devi, "sdt", S_IFCHR, + 0, DDI_PSEUDO, NULL) == DDI_FAILURE) { + cmn_err(CE_NOTE, "/dev/sdt couldn't create minor node"); + ddi_remove_minor_node(devi, NULL); + return (DDI_FAILURE); + } + + ddi_report_dev(devi); + sdt_devi = devi; + + if (sdt_probetab_size == 0) + sdt_probetab_size = SDT_PROBETAB_SIZE; + + sdt_probetab_mask = sdt_probetab_size - 1; + sdt_probetab = + kmem_zalloc(sdt_probetab_size * sizeof (sdt_probe_t *), KM_SLEEP); + dtrace_invop_add(sdt_invop); + + for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) { + if (dtrace_register(prov->sdtp_name, prov->sdtp_attr, + DTRACE_PRIV_KERNEL, NULL, + &sdt_pops, prov, &prov->sdtp_id) != 0) { + cmn_err(CE_WARN, "failed to register sdt provider %s", + prov->sdtp_name); + } + } + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +sdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + sdt_provider_t *prov; + + switch (cmd) { + case DDI_DETACH: + break; + + case DDI_SUSPEND: + return (DDI_SUCCESS); + + default: + return (DDI_FAILURE); + } + + for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) { + if (prov->sdtp_id != DTRACE_PROVNONE) { + if (dtrace_unregister(prov->sdtp_id) != 0) + return (DDI_FAILURE); + + prov->sdtp_id = DTRACE_PROVNONE; + } + } + + dtrace_invop_remove(sdt_invop); + kmem_free(sdt_probetab, sdt_probetab_size * sizeof (sdt_probe_t *)); + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +sdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) +{ + int error; + + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + *result = (void *)sdt_devi; + error = DDI_SUCCESS; + break; + case DDI_INFO_DEVT2INSTANCE: + *result = (void *)0; + error = DDI_SUCCESS; + break; + default: + error = DDI_FAILURE; + } + return (error); +} + +/*ARGSUSED*/ +static int +sdt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) +{ + return (0); +} + +static struct cb_ops sdt_cb_ops = { + sdt_open, /* open */ + nodev, /* close */ + nulldev, /* strategy */ + nulldev, /* print */ + nodev, /* dump */ + nodev, /* read */ + nodev, /* write */ + nodev, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + nochpoll, /* poll */ + ddi_prop_op, /* cb_prop_op */ + 0, /* streamtab */ + D_NEW | D_MP /* Driver compatibility flag */ +}; + +static struct dev_ops sdt_ops = { + DEVO_REV, /* devo_rev, */ + 0, /* refcnt */ + sdt_info, /* get_dev_info */ + nulldev, /* identify */ + nulldev, /* probe */ + sdt_attach, /* attach */ + sdt_detach, /* detach */ + nodev, /* reset */ + &sdt_cb_ops, /* driver operations */ + NULL, /* bus operations */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ +}; + +/* + * Module linkage information for the kernel. + */ +static struct modldrv modldrv = { + &mod_driverops, /* module type (this is a pseudo driver) */ + "Statically Defined Tracing", /* name of module */ + &sdt_ops, /* driver ops */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, + (void *)&modldrv, + NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} diff --git a/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/sdt.conf b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/sdt.conf new file mode 100644 index 00000000..bde2149c --- /dev/null +++ b/src/VBox/ExtPacks/VBoxDTrace/onnv/uts/intel/dtrace/sdt.conf @@ -0,0 +1,28 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#pragma ident "%Z%%M% %I% %E% SMI" + +name="sdt" parent="pseudo" instance=0; |