#ifdef KERNEL
#ifndef _KERNEL
#define _KERNEL
#endif
#endif
#define MACH__POSIX_C_SOURCE_PRIVATE 1
#include <kern/thread.h>
#include <mach/thread_status.h>
#include <mach/vm_param.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#include <libkern/kernel_mach_header.h>
#include <libkern/OSAtomic.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <miscfs/devfs/devfs.h>
#include <sys/dtrace.h>
#include <sys/dtrace_impl.h>
#include <sys/fbt.h>
#include <sys/dtrace_glue.h>
#define DTRACE_INVOP_NOP_SKIP 1
#define DTRACE_INVOP_MOVL_ESP_EBP 10
#define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2
#define DTRACE_INVOP_MOV_RSP_RBP 11
#define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3
#define DTRACE_INVOP_POP_RBP 12
#define DTRACE_INVOP_POP_RBP_SKIP 1
#define DTRACE_INVOP_LEAVE_SKIP 1
#define FBT_PUSHL_EBP 0x55
#define FBT_MOVL_ESP_EBP0_V0 0x8b
#define FBT_MOVL_ESP_EBP1_V0 0xec
#define FBT_MOVL_ESP_EBP0_V1 0x89
#define FBT_MOVL_ESP_EBP1_V1 0xe5
#define FBT_PUSH_RBP 0x55
#define FBT_REX_RSP_RBP 0x48
#define FBT_MOV_RSP_RBP0 0x89
#define FBT_MOV_RSP_RBP1 0xe5
#define FBT_POP_RBP 0x5d
#define FBT_POPL_EBP 0x5d
#define FBT_RET 0xc3
#define FBT_RET_IMM16 0xc2
#define FBT_LEAVE 0xc9
#define FBT_JMP_SHORT_REL 0xeb
#define FBT_JMP_NEAR_REL 0xe9
#define FBT_JMP_FAR_ABS 0xea
#define FBT_RET_LEN 1
#define FBT_RET_IMM16_LEN 3
#define FBT_JMP_SHORT_REL_LEN 2
#define FBT_JMP_NEAR_REL_LEN 5
#define FBT_JMP_FAR_ABS_LEN 5
#define FBT_PATCHVAL 0xf0
#define FBT_AFRAMES_ENTRY 7
#define FBT_AFRAMES_RETURN 6
#define FBT_ENTRY "entry"
#define FBT_RETURN "return"
#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
extern dtrace_provider_id_t fbt_id;
extern fbt_probe_t **fbt_probetab;
extern int fbt_probetab_mask;
kern_return_t fbt_perfCallback(int, x86_saved_state_t *, uintptr_t *, __unused int);
int
fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval)
{
fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
if (fbt->fbtp_roffset == 0) {
x86_saved_state64_t *regs = (x86_saved_state64_t *)state;
CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t));
dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8);
CPU->cpu_dtrace_caller = 0;
} else {
dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
CPU->cpu_dtrace_caller = 0;
}
return (fbt->fbtp_rval);
}
}
return (0);
}
#define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0))
#define T_INVALID_OPCODE 6
#define FBT_EXCEPTION_CODE T_INVALID_OPCODE
#define T_PREEMPT 255
kern_return_t
fbt_perfCallback(
int trapno,
x86_saved_state_t *tagged_regs,
uintptr_t *lo_spp,
__unused int unused2)
{
kern_return_t retval = KERN_FAILURE;
x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
boolean_t oldlevel;
uint64_t rsp_probe, fp, delta = 0;
uintptr_t old_sp;
uint32_t *pDst;
int emul;
oldlevel = ml_set_interrupts_enabled(FALSE);
rsp_probe = saved_state->isf.rsp;
__asm__ volatile(
"Ldtrace_invop_callsite_pre_label:\n"
".data\n"
".private_extern _dtrace_invop_callsite_pre\n"
"_dtrace_invop_callsite_pre:\n"
" .quad Ldtrace_invop_callsite_pre_label\n"
".text\n"
);
emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax );
__asm__ volatile(
"Ldtrace_invop_callsite_post_label:\n"
".data\n"
".private_extern _dtrace_invop_callsite_post\n"
"_dtrace_invop_callsite_post:\n"
" .quad Ldtrace_invop_callsite_post_label\n"
".text\n"
);
switch (emul) {
case DTRACE_INVOP_NOP:
saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP;
retval = KERN_SUCCESS;
break;
case DTRACE_INVOP_MOV_RSP_RBP:
saved_state->rbp = rsp_probe;
saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP;
retval = KERN_SUCCESS;
break;
case DTRACE_INVOP_POP_RBP:
case DTRACE_INVOP_LEAVE:
fp = saved_state->rbp;
saved_state->rbp = *(uint64_t *)fp;
saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP;
delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe);
delta += 2;
saved_state->isf.rsp += (delta << 2);
old_sp = *lo_spp;
for (pDst = (uint32_t *)fp;
pDst > (((uint32_t *)old_sp));
pDst--)
*pDst = pDst[-delta];
saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2));
*lo_spp = old_sp + (delta << 2);
retval = KERN_SUCCESS;
break;
default:
retval = KERN_FAILURE;
break;
}
saved_state->isf.trapno = T_PREEMPT;
ml_set_interrupts_enabled(oldlevel);
}
return retval;
}
void
fbt_provide_probe(struct modctl *ctl, uintptr_t instrLow, uintptr_t instrHigh, char *modname, char* symbolName, machine_inst_t* symbolStart)
{
unsigned int j;
unsigned int doenable = 0;
dtrace_id_t thisid;
fbt_probe_t *newfbt, *retfbt, *entryfbt;
machine_inst_t *instr, *limit, theInstr, i1, i2, i3;
int size;
if (!symbolStart || !instrLow || !instrHigh) {
kprintf("dtrace: %s has an invalid address\n", symbolName);
return;
}
for (j = 0, instr = symbolStart, theInstr = 0;
(j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2));
j++) {
theInstr = instr[0];
if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
break;
if ((size = dtrace_instr_size(instr)) <= 0)
break;
instr += size;
}
if (theInstr != FBT_PUSH_RBP)
return;
i1 = instr[1];
i2 = instr[2];
i3 = instr[3];
limit = (machine_inst_t *)instrHigh;
if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) {
instr += 1;
theInstr = i1;
} else {
return;
}
#if 0
else {
instr += 1;
if ((size = dtrace_instr_size(instr)) <= 0)
return;
instr += size;
if ((instr + 1) >= limit)
return;
i1 = instr[0];
i2 = instr[1];
if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
!(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
return;
theInstr = i1;
}
#endif
thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY);
newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
if (thisid != 0) {
entryfbt = dtrace_probe_arg (fbt_id, thisid);
ASSERT (entryfbt != NULL);
for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) {
if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval)
doenable++;
if (entryfbt->fbtp_next == NULL) {
entryfbt->fbtp_next = newfbt;
newfbt->fbtp_id = entryfbt->fbtp_id;
break;
}
}
}
else {
newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt);
doenable = 0;
}
newfbt->fbtp_patchpoint = instr;
newfbt->fbtp_ctl = ctl;
newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
newfbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP;
newfbt->fbtp_savedval = theInstr;
newfbt->fbtp_patchval = FBT_PATCHVAL;
newfbt->fbtp_currentval = 0;
newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt;
if (doenable)
fbt_enable(NULL, newfbt->fbtp_id, newfbt);
doenable=0;
thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN);
if (thisid != 0) {
retfbt = dtrace_probe_arg (fbt_id, thisid);
ASSERT(retfbt != NULL);
for (; retfbt != NULL; retfbt = retfbt->fbtp_next) {
if (retfbt->fbtp_currentval == retfbt->fbtp_patchval)
doenable++;
if(retfbt->fbtp_next == NULL)
break;
}
}
else {
doenable = 0;
retfbt = NULL;
}
again:
if (instr >= limit)
return;
if ((size = dtrace_instr_size(instr)) <= 0)
return;
for (j = 0; j < sizeof (uintptr_t); j++) {
uintptr_t check = (uintptr_t)instr - j;
uint8_t *ptr;
if (check < (uintptr_t)symbolStart)
break;
if (check + sizeof (uintptr_t) > (uintptr_t)limit)
continue;
ptr = *(uint8_t **)check;
if (ptr >= (uint8_t *)symbolStart && ptr < limit) {
instr += size;
goto again;
}
}
theInstr = instr[0];
if (theInstr == FBT_PUSH_RBP)
return;
if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) {
instr += size;
goto again;
}
machine_inst_t *patch_instr = instr;
instr += size;
if (instr >= limit)
return;
size = dtrace_instr_size(instr);
if (size <= 0)
return;
theInstr = instr[0];
if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
!(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
!(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
!(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
!(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
return;
newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
if (retfbt == NULL) {
newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt);
} else {
retfbt->fbtp_next = newfbt;
newfbt->fbtp_id = retfbt->fbtp_id;
}
retfbt = newfbt;
newfbt->fbtp_patchpoint = patch_instr;
newfbt->fbtp_ctl = ctl;
newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
if (*patch_instr == FBT_POP_RBP) {
newfbt->fbtp_rval = DTRACE_INVOP_POP_RBP;
} else {
ASSERT(*patch_instr == FBT_LEAVE);
newfbt->fbtp_rval = DTRACE_INVOP_LEAVE;
}
newfbt->fbtp_roffset =
(uintptr_t)(patch_instr - (uint8_t *)symbolStart);
newfbt->fbtp_savedval = *patch_instr;
newfbt->fbtp_patchval = FBT_PATCHVAL;
newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt;
if (doenable)
fbt_enable(NULL, newfbt->fbtp_id, newfbt);
instr += size;
goto again;
}
void
fbt_provide_module_kernel_syms(struct modctl *ctl)
{
kernel_mach_header_t *mh;
struct load_command *cmd;
kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL;
struct symtab_command *orig_st = NULL;
kernel_nlist_t *sym = NULL;
char *strings;
uintptr_t instrLow, instrHigh;
char *modname;
unsigned int i;
mh = (kernel_mach_header_t *)(ctl->mod_address);
modname = ctl->mod_modname;
if (mh->magic != MH_MAGIC_KERNEL)
return;
cmd = (struct load_command *) &mh[1];
for (i = 0; i < mh->ncmds; i++) {
if (cmd->cmd == LC_SEGMENT_KERNEL) {
kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
orig_ts = orig_sg;
else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
orig_le = orig_sg;
else if (LIT_STRNEQL(orig_sg->segname, ""))
orig_ts = orig_sg;
}
else if (cmd->cmd == LC_SYMTAB)
orig_st = (struct symtab_command *) cmd;
cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
}
if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
return;
sym = (kernel_nlist_t *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
instrLow = (uintptr_t)orig_ts->vmaddr;
instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
for (i = 0; i < orig_st->nsyms; i++) {
uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
char *name = strings + sym[i].n_un.n_strx;
if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
continue;
if (0 == sym[i].n_un.n_strx)
continue;
if (*name == '_')
name += 1;
if (MOD_IS_MACH_KERNEL(ctl) && fbt_excluded(name))
continue;
fbt_provide_probe(ctl, instrLow, instrHigh, modname, name, (machine_inst_t*)sym[i].n_value);
}
}