#ifdef KERNEL
#ifndef _KERNEL
#define _KERNEL
#endif
#endif
#define MACH__POSIX_C_SOURCE_PRIVATE 1
#include <kern/thread.h>
#include <mach/thread_status.h>
#include <mach/vm_param.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
extern struct mach_header _mh_execute_header;
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <miscfs/devfs/devfs.h>
#include <sys/dtrace.h>
#include <sys/dtrace_impl.h>
#include <sys/fbt.h>
#include <sys/dtrace_glue.h>
#define DTRACE_INVOP_NOP_SKIP 1
#define DTRACE_INVOP_MOVL_ESP_EBP 10
#define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2
#define DTRACE_INVOP_LEAVE_SKIP 1
#define FBT_PUSHL_EBP 0x55
#define FBT_MOVL_ESP_EBP0_V0 0x8b
#define FBT_MOVL_ESP_EBP1_V0 0xec
#define FBT_MOVL_ESP_EBP0_V1 0x89
#define FBT_MOVL_ESP_EBP1_V1 0xe5
#define FBT_REX_RSP_RBP 0x48
#define FBT_POPL_EBP 0x5d
#define FBT_RET 0xc3
#define FBT_RET_IMM16 0xc2
#define FBT_LEAVE 0xc9
#define FBT_JMP_SHORT_REL 0xeb
#define FBT_JMP_NEAR_REL 0xe9
#define FBT_JMP_FAR_ABS 0xea
#define FBT_RET_LEN 1
#define FBT_RET_IMM16_LEN 3
#define FBT_JMP_SHORT_REL_LEN 2
#define FBT_JMP_NEAR_REL_LEN 5
#define FBT_JMP_FAR_ABS_LEN 5
#define FBT_PATCHVAL 0xf0
#define FBT_AFRAMES_ENTRY 7
#define FBT_AFRAMES_RETURN 6
#define FBT_ENTRY "entry"
#define FBT_RETURN "return"
#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
extern dtrace_provider_id_t fbt_id;
extern fbt_probe_t **fbt_probetab;
extern int fbt_probetab_mask;
static const char * critical_blacklist[] =
{
"bcopy_phys",
"console_cpu_alloc",
"console_cpu_free",
"cpu_IA32e_disable",
"cpu_IA32e_enable",
"cpu_control",
"cpu_data_alloc",
"cpu_desc_init",
"cpu_desc_init64",
"cpu_desc_load64",
"cpu_exit_wait",
"cpu_info",
"cpu_info_count",
"cpu_init",
"cpu_interrupt",
"cpu_machine_init",
"cpu_mode_init",
"cpu_processor_alloc",
"cpu_processor_free",
"cpu_signal_handler",
"cpu_sleep",
"cpu_start",
"cpu_subtype",
"cpu_thread_alloc",
"cpu_thread_halt",
"cpu_thread_init",
"cpu_threadtype",
"cpu_to_processor",
"cpu_topology_start",
"cpu_type",
"cpu_window_init",
"cpuid_cpu_display",
"handle_pending_TLB_flushes",
"hw_compare_and_store",
"machine_idle_cstate",
"mca_cpu_alloc",
"mca_cpu_init",
"ml_nofault_copy",
"pmap_cpu_alloc",
"pmap_cpu_free",
"pmap_cpu_high_map_vaddr",
"pmap_cpu_high_shared_remap",
"pmap_cpu_init",
"register_cpu_setup_func",
"unregister_cpu_setup_func"
};
#define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0]))
static const char * probe_ctx_closure[] =
{
"Debugger",
"OSCompareAndSwap",
"absolutetime_to_microtime",
"ast_pending",
"clock_get_calendar_nanotime_nowait",
"copyin",
"copyin_user",
"copyinstr",
"copyout",
"copyoutstr",
"cpu_number",
"current_proc",
"current_processor",
"current_task",
"current_thread",
"debug_enter",
"find_user_regs",
"flush_tlb64",
"get_bsdtask_info",
"get_bsdthread_info",
"hw_atomic_and",
"kauth_cred_get",
"kauth_getgid",
"kauth_getuid",
"kernel_preempt_check",
"mach_absolute_time",
"max_valid_stack_address",
"ml_at_interrupt_context",
"ml_phys_write_byte_64",
"ml_phys_write_half_64",
"ml_phys_write_word_64",
"ml_set_interrupts_enabled",
"panic",
"pmap64_pde",
"pmap64_pdpt",
"pmap_find_phys",
"pmap_get_mapwindow",
"pmap_pde",
"pmap_pte",
"pmap_put_mapwindow",
"pmap_valid_page",
"prf",
"proc_is64bit",
"proc_selfname",
"proc_selfpid",
"psignal_lock",
"rtc_nanotime_load",
"rtc_nanotime_read",
"strlcpy",
"sync_iss_to_iks_unconditionally",
"timer_grab"
};
#define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0]))
static int _cmp(const void *a, const void *b)
{
return strcmp((const char *)a, *(const char **)b);
}
static const void * bsearch(
register const void *key,
const void *base0,
size_t nmemb,
register size_t size,
register int (*compar)(const void *, const void *)) {
register const char *base = base0;
register size_t lim;
register int cmp;
register const void *p;
for (lim = nmemb; lim != 0; lim >>= 1) {
p = base + (lim >> 1) * size;
cmp = (*compar)(key, p);
if (cmp == 0)
return p;
if (cmp > 0) {
base = (const char *)p + size;
lim--;
}
}
return (NULL);
}
int
fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
{
uintptr_t stack0 = 0, stack1 = 0, stack2 = 0, stack3 = 0, stack4 = 0;
fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
if (fbt->fbtp_roffset == 0) {
uintptr_t *stacktop;
if (CPU_ON_INTR(CPU))
stacktop = (uintptr_t *)dtrace_get_cpu_int_stack_top();
else
stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + KERNEL_STACK_SIZE);
stack += 1;
if (stack <= stacktop)
CPU->cpu_dtrace_caller = *stack++;
if (stack <= stacktop)
stack0 = *stack++;
if (stack <= stacktop)
stack1 = *stack++;
if (stack <= stacktop)
stack2 = *stack++;
if (stack <= stacktop)
stack3 = *stack++;
if (stack <= stacktop)
stack4 = *stack++;
dtrace_probe(fbt->fbtp_id, stack0, stack1, stack2, stack3, stack4);
CPU->cpu_dtrace_caller = 0;
} else {
dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
CPU->cpu_dtrace_caller = 0;
}
return (fbt->fbtp_rval);
}
}
return (0);
}
#define IS_USER_TRAP(regs) (regs && (((regs)->cs & 3) != 0))
#define T_INVALID_OPCODE 6
#define FBT_EXCEPTION_CODE T_INVALID_OPCODE
kern_return_t
fbt_perfCallback(
int trapno,
x86_saved_state_t *tagged_regs,
__unused int unused1,
__unused int unused2)
{
kern_return_t retval = KERN_FAILURE;
x86_saved_state32_t *saved_state = saved_state32(tagged_regs);
struct x86_saved_state32_from_kernel *regs = (struct x86_saved_state32_from_kernel *)saved_state;
if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
boolean_t oldlevel, cpu_64bit;
uint32_t esp_probe, *ebp, edi, fp, *pDst, delta = 0;
int emul;
cpu_64bit = ml_is64bit();
oldlevel = ml_set_interrupts_enabled(FALSE);
if (cpu_64bit) {
esp_probe = saved_state->uesp;
} else {
esp_probe = (uint32_t)&(regs[1]);
}
emul = dtrace_invop( saved_state->eip, (uintptr_t *)esp_probe, saved_state->eax );
__asm__ volatile(".globl _dtrace_invop_callsite");
__asm__ volatile("_dtrace_invop_callsite:");
switch (emul) {
case DTRACE_INVOP_NOP:
saved_state->eip += DTRACE_INVOP_NOP_SKIP;
retval = KERN_SUCCESS;
break;
case DTRACE_INVOP_MOVL_ESP_EBP:
saved_state->ebp = esp_probe;
saved_state->eip += DTRACE_INVOP_MOVL_ESP_EBP_SKIP;
retval = KERN_SUCCESS;
break;
case DTRACE_INVOP_POPL_EBP:
case DTRACE_INVOP_LEAVE:
fp = saved_state->ebp;
saved_state->ebp = *(uint32_t *)fp;
saved_state->eip += DTRACE_INVOP_LEAVE_SKIP;
delta = ((uint32_t *)fp) - ((uint32_t *)esp_probe);
delta += 1;
if (cpu_64bit)
saved_state->uesp += (delta << 2);
ebp = (uint32_t *)__builtin_frame_address(0);
ebp = (uint32_t *)*ebp;
edi = *(ebp - 1);
for (pDst = (uint32_t *)fp;
pDst > (((uint32_t *)edi));
pDst--)
*pDst = pDst[-delta];
*(ebp - 1) = edi + (delta << 2);
retval = KERN_SUCCESS;
break;
default:
retval = KERN_FAILURE;
break;
}
ml_set_interrupts_enabled(oldlevel);
}
return retval;
}
static void
__fbt_provide_module(void *arg, struct modctl *ctl)
{
#pragma unused(arg)
struct mach_header *mh;
struct load_command *cmd;
struct segment_command *orig_ts = NULL, *orig_le = NULL;
struct symtab_command *orig_st = NULL;
struct nlist *sym = NULL;
char *strings;
uintptr_t instrLow, instrHigh;
char *modname;
unsigned int i, j;
int gIgnoreFBTBlacklist = 0;
PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
mh = (struct mach_header *)(ctl->address);
modname = ctl->mod_modname;
if (0 == ctl->address || 0 == ctl->size)
return;
if (strcmp(modname, "com.apple.driver.dtrace") == 0)
return;
if (strstr(modname, "CHUD") != NULL)
return;
if (mh->magic != MH_MAGIC)
return;
cmd = (struct load_command *) &mh[1];
for (i = 0; i < mh->ncmds; i++) {
if (cmd->cmd == LC_SEGMENT) {
struct segment_command *orig_sg = (struct segment_command *) cmd;
if (strcmp(SEG_TEXT, orig_sg->segname) == 0)
orig_ts = orig_sg;
else if (strcmp(SEG_LINKEDIT, orig_sg->segname) == 0)
orig_le = orig_sg;
else if (strcmp("", orig_sg->segname) == 0)
orig_ts = orig_sg;
}
else if (cmd->cmd == LC_SYMTAB)
orig_st = (struct symtab_command *) cmd;
cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
}
if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
return;
sym = (struct nlist *)orig_le->vmaddr;
strings = ((char *)sym) + orig_st->nsyms * sizeof(struct nlist);
instrLow = (uintptr_t)orig_ts->vmaddr;
instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
for (i = 0; i < orig_st->nsyms; i++) {
fbt_probe_t *fbt, *retfbt;
machine_inst_t *instr, *limit, theInstr, i1, i2;
uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
char *name = strings + sym[i].n_un.n_strx;
int size;
if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
continue;
if (0 == sym[i].n_un.n_strx)
continue;
if (*name == '_')
name += 1;
if (strstr(name, "dtrace_") == name &&
strstr(name, "dtrace_safe_") != name) {
continue;
}
if (strstr(name, "dsmos_") == name)
continue;
if (strstr(name, "dtxnu_") == name ||
strstr(name, "_dtrace") == name)
continue;
if (strstr(name, "chud") == name)
continue;
if (strstr(name, "hibernate_") == name)
continue;
if (0 == strcmp(name, "ZN9IOService14newTemperatureElPS_") ||
0 == strcmp(name, "ZN9IOService26temperatureCriticalForZoneEPS_"))
continue;
if (0 == strcmp(name, "t_invop") ||
0 == strcmp(name, "enter_lohandler") ||
0 == strcmp(name, "lo_alltraps") ||
0 == strcmp(name, "kernel_trap") ||
0 == strcmp(name, "i386_astintr"))
continue;
if (0 == strcmp(name, "current_thread") ||
0 == strcmp(name, "ast_pending") ||
0 == strcmp(name, "fbt_perfCallback") ||
0 == strcmp(name, "machine_thread_get_kern_state") ||
0 == strcmp(name, "ml_set_interrupts_enabled") ||
0 == strcmp(name, "dtrace_invop") ||
0 == strcmp(name, "fbt_invop") ||
0 == strcmp(name, "sdt_invop") ||
0 == strcmp(name, "max_valid_stack_address"))
continue;
if (strstr(name, "machine_stack_") == name ||
strstr(name, "mapping_") == name ||
0 == strcmp(name, "tmrCvt") ||
strstr(name, "tsc_") == name ||
strstr(name, "pmCPU") == name ||
0 == strcmp(name, "Cstate_table_set") ||
0 == strcmp(name, "pmKextRegister") ||
0 == strcmp(name, "pmSafeMode") ||
0 == strcmp(name, "pmUnregister") ||
strstr(name, "pms") == name ||
0 == strcmp(name, "power_management_init") ||
strstr(name, "usimple_") == name ||
strstr(name, "rtc_") == name ||
strstr(name, "_rtc_") == name ||
strstr(name, "rtclock_") == name ||
strstr(name, "clock_") == name ||
strstr(name, "absolutetime_to_") == name ||
0 == strcmp(name, "setPop") ||
0 == strcmp(name, "nanoseconds_to_absolutetime") ||
0 == strcmp(name, "nanotime_to_absolutetime") ||
strstr(name, "etimer_") == name ||
strstr(name, "commpage_") == name ||
strstr(name, "pmap_") == name ||
strstr(name, "ml_") == name ||
strstr(name, "PE_") == name ||
strstr(name, "lapic_") == name ||
strstr(name, "acpi_") == name)
continue;
if (strstr(name, "machine_") == name)
continue;
if (0 == strcmp(name, "handle_pending_TLB_flushes"))
continue;
if (!gIgnoreFBTBlacklist &&
bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
continue;
if (!gIgnoreFBTBlacklist &&
bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL)
continue;
if (strstr(name, "kdp_") == name ||
strstr(name, "kdb_") == name ||
strstr(name, "kdbg_") == name ||
strstr(name, "kdebug_") == name ||
0 == strcmp(name, "kernel_debug") ||
0 == strcmp(name, "Debugger") ||
0 == strcmp(name, "Call_DebuggerC") ||
0 == strcmp(name, "lock_debugger") ||
0 == strcmp(name, "unlock_debugger") ||
0 == strcmp(name, "SysChoked"))
continue;
if (NULL != strstr(name, "panic_") ||
0 == strcmp(name, "panic") ||
0 == strcmp(name, "handleMck") ||
0 == strcmp(name, "unresolved_kernel_trap"))
continue;
if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0)
continue;
for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0;
(j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2));
j++) {
theInstr = instr[0];
if (theInstr == FBT_PUSHL_EBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
break;
if ((size = dtrace_instr_size(instr)) <= 0)
break;
instr += size;
}
if (theInstr != FBT_PUSHL_EBP)
continue;
i1 = instr[1];
i2 = instr[2];
limit = (machine_inst_t *)instrHigh;
if ((i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) ||
(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) {
instr += 1;
theInstr = i1;
} else {
instr += 1;
if ((size = dtrace_instr_size(instr)) <= 0)
continue;
instr += size;
if ((instr + 1) >= limit)
continue;
i1 = instr[0];
i2 = instr[1];
if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
!(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
continue;
theInstr = i1;
}
fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt);
fbt->fbtp_patchpoint = instr;
fbt->fbtp_ctl = ctl;
fbt->fbtp_loadcnt = ctl->mod_loadcnt;
fbt->fbtp_rval = DTRACE_INVOP_MOVL_ESP_EBP;
fbt->fbtp_savedval = theInstr;
fbt->fbtp_patchval = FBT_PATCHVAL;
fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
fbt->fbtp_symndx = i;
fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
retfbt = NULL;
again:
if (instr >= limit)
continue;
if ((size = dtrace_instr_size(instr)) <= 0)
continue;
for (j = 0; j < sizeof (uintptr_t); j++) {
uintptr_t check = (uintptr_t)instr - j;
uint8_t *ptr;
if (check < sym[i].n_value)
break;
if (check + sizeof (uintptr_t) > (uintptr_t)limit)
continue;
ptr = *(uint8_t **)check;
if (ptr >= (uint8_t *)sym[i].n_value && ptr < limit) {
instr += size;
goto again;
}
}
theInstr = instr[0];
if (theInstr == FBT_PUSHL_EBP)
continue;
if (!(size == 1 && (theInstr == FBT_POPL_EBP || theInstr == FBT_LEAVE))) {
instr += size;
goto again;
}
machine_inst_t *patch_instr = instr;
instr += size;
if (instr >= limit)
continue;
size = dtrace_instr_size(instr);
if (size <= 0)
continue;
theInstr = instr[0];
if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
!(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
!(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
!(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
!(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
continue;
fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
if (retfbt == NULL) {
fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt);
} else {
retfbt->fbtp_next = fbt;
fbt->fbtp_id = retfbt->fbtp_id;
}
retfbt = fbt;
fbt->fbtp_patchpoint = patch_instr;
fbt->fbtp_ctl = ctl;
fbt->fbtp_loadcnt = ctl->mod_loadcnt;
if (*patch_instr == FBT_POPL_EBP) {
fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
} else {
ASSERT(*patch_instr == FBT_LEAVE);
fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
}
fbt->fbtp_roffset =
(uintptr_t)(patch_instr - (uint8_t *)sym[i].n_value);
fbt->fbtp_savedval = *patch_instr;
fbt->fbtp_patchval = FBT_PATCHVAL;
fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
fbt->fbtp_symndx = i;
fbt_probetab[FBT_ADDR2NDX(patch_instr)] = fbt;
instr += size;
goto again;
}
}
extern struct modctl g_fbt_kernctl;
#undef kmem_alloc
#undef kmem_free
#include <vm/vm_kern.h>
void
fbt_provide_module(void *arg, struct modctl *ctl)
{
#pragma unused(ctl)
__fbt_provide_module(arg, &g_fbt_kernctl);
kmem_free(kernel_map, (vm_offset_t)g_fbt_kernctl.address, round_page_32(g_fbt_kernctl.size));
g_fbt_kernctl.address = 0;
g_fbt_kernctl.size = 0;
}