#include <kern/debug.h>
#include <libkern/libkern.h>
#include <pexpert/pexpert.h>
#include <sys/param.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/kauth.h>
#include <kperf/action.h>
#include <kperf/context.h>
#include <kperf/kdebug_trigger.h>
#include <kperf/kperf.h>
#include <kperf/kperfbsd.h>
#include <kperf/kperf_timer.h>
#include <kperf/pet.h>
#include <sys/ktrace.h>
#define REQ_SAMPLING (1)
#define REQ_ACTION_COUNT (2)
#define REQ_ACTION_SAMPLERS (3)
#define REQ_TIMER_COUNT (4)
#define REQ_TIMER_PERIOD (5)
#define REQ_TIMER_PET (6)
#define REQ_TIMER_ACTION (7)
#define REQ_BLESS (8)
#define REQ_ACTION_USERDATA (9)
#define REQ_ACTION_FILTER_BY_TASK (10)
#define REQ_ACTION_FILTER_BY_PID (11)
#define REQ_PET_IDLE_RATE (13)
#define REQ_BLESS_PREEMPT (14)
#define REQ_KDBG_CSWITCH (15)
#define REQ_RESET (16)
#define REQ_ACTION_UCALLSTACK_DEPTH (18)
#define REQ_ACTION_KCALLSTACK_DEPTH (19)
#define REQ_LIGHTWEIGHT_PET (20)
#define REQ_KDEBUG_ACTION (21)
#define REQ_KDEBUG_FILTER (22)
int kperf_debug_level = 0;
#if DEVELOPMENT || DEBUG
_Atomic long long kperf_pending_ipis = 0;
#endif
static int
kperf_sysctl_get_set_uint32(struct sysctl_req *req,
uint32_t (*get)(void), int (*set)(uint32_t))
{
assert(req != NULL);
assert(get != NULL);
assert(set != NULL);
uint32_t value = 0;
if (req->oldptr) {
value = get();
}
int error = sysctl_io_number(req, value, sizeof(value), &value, NULL);
if (error || !req->newptr) {
return error;
}
return set(value);
}
static int
kperf_sysctl_get_set_int(struct sysctl_req *req,
int (*get)(void), int (*set)(int))
{
assert(req != NULL);
assert(get != NULL);
assert(set != NULL);
int value = 0;
if (req->oldptr) {
value = get();
}
int error = sysctl_io_number(req, value, sizeof(value), &value, NULL);
if (error || !req->newptr) {
return error;
}
return set(value);
}
static int
kperf_sysctl_get_set_unsigned_uint32(struct sysctl_req *req,
int (*get)(unsigned int, uint32_t *), int (*set)(unsigned int, uint32_t))
{
assert(req != NULL);
assert(get != NULL);
assert(set != NULL);
int error;
uint64_t inputs[2];
if ((error = SYSCTL_IN(req, inputs, sizeof(inputs)))) {
return error;
}
unsigned int action_id = (unsigned int)inputs[0];
uint32_t new_value = (uint32_t)inputs[1];
if (req->oldptr != USER_ADDR_NULL) {
uint32_t value_out = 0;
if ((error = get(action_id, &value_out))) {
return error;
}
inputs[1] = value_out;
} else {
if ((error = set(action_id, new_value))) {
return error;
}
}
if (req->oldptr != USER_ADDR_NULL) {
error = SYSCTL_OUT(req, inputs, sizeof(inputs));
return error;
} else {
return 0;
}
}
static int
sysctl_timer_period(struct sysctl_req *req)
{
assert(req != NULL);
int error;
uint64_t inputs[2];
if ((error = SYSCTL_IN(req, inputs, sizeof(inputs)))) {
return error;
}
unsigned int timer = (unsigned int)inputs[0];
uint64_t new_period = inputs[1];
if (req->oldptr != USER_ADDR_NULL) {
uint64_t period_out = 0;
if ((error = kperf_timer_get_period(timer, &period_out))) {
return error;
}
inputs[1] = period_out;
} else {
if ((error = kperf_timer_set_period(timer, new_period))) {
return error;
}
}
return SYSCTL_OUT(req, inputs, sizeof(inputs));
}
static int
sysctl_action_filter(struct sysctl_req *req, boolean_t is_task_t)
{
assert(req != NULL);
int error;
uint64_t inputs[2];
if ((error = SYSCTL_IN(req, inputs, sizeof(inputs)))) {
return error;
}
unsigned int actionid = (unsigned int)inputs[0];
int new_filter = (int)inputs[1];
if (req->oldptr != USER_ADDR_NULL) {
int filter_out;
if ((error = kperf_action_get_filter(actionid, &filter_out))) {
return error;
}
inputs[1] = filter_out;
} else {
int pid = is_task_t ? kperf_port_to_pid((mach_port_name_t)new_filter)
: new_filter;
if ((error = kperf_action_set_filter(actionid, pid))) {
return error;
}
}
return SYSCTL_OUT(req, inputs, sizeof(inputs));
}
static int
sysctl_bless(struct sysctl_req *req)
{
int value = ktrace_get_owning_pid();
int error = sysctl_io_number(req, value, sizeof(value), &value, NULL);
if (error || !req->newptr) {
return error;
}
return ktrace_set_owning_pid(value);
}
static int
sysctl_action_samplers(struct sysctl_req *req)
{
return kperf_sysctl_get_set_unsigned_uint32(req,
kperf_action_get_samplers, kperf_action_set_samplers);
}
static int
sysctl_action_userdata(struct sysctl_req *req)
{
return kperf_sysctl_get_set_unsigned_uint32(req,
kperf_action_get_userdata, kperf_action_set_userdata);
}
static int
sysctl_action_ucallstack_depth(struct sysctl_req *req)
{
return kperf_sysctl_get_set_unsigned_uint32(req,
kperf_action_get_ucallstack_depth, kperf_action_set_ucallstack_depth);
}
static int
sysctl_action_kcallstack_depth(struct sysctl_req *req)
{
return kperf_sysctl_get_set_unsigned_uint32(req,
kperf_action_get_kcallstack_depth, kperf_action_set_kcallstack_depth);
}
static int
sysctl_kdebug_action(struct sysctl_req *req)
{
return kperf_sysctl_get_set_int(req, kperf_kdebug_get_action,
kperf_kdebug_set_action);
}
static int
sysctl_kdebug_filter(struct sysctl_req *req)
{
assert(req != NULL);
if (req->oldptr != USER_ADDR_NULL) {
struct kperf_kdebug_filter *filter = NULL;
uint32_t n_debugids = kperf_kdebug_get_filter(&filter);
size_t filter_size = KPERF_KDEBUG_FILTER_SIZE(n_debugids);
if (n_debugids == 0) {
return EINVAL;
}
return SYSCTL_OUT(req, filter, filter_size);
}
return kperf_kdebug_set_filter(req->newptr, (uint32_t)req->newlen);
}
static int
kperf_sampling_set(uint32_t sample_start)
{
if (sample_start) {
return kperf_sampling_enable();
} else {
return kperf_sampling_disable();
}
}
static int
sysctl_sampling(struct sysctl_req *req)
{
return kperf_sysctl_get_set_uint32(req, kperf_sampling_status,
kperf_sampling_set);
}
static int
sysctl_action_count(struct sysctl_req *req)
{
return kperf_sysctl_get_set_uint32(req, kperf_action_get_count,
kperf_action_set_count);
}
static int
sysctl_timer_count(struct sysctl_req *req)
{
return kperf_sysctl_get_set_uint32(req, kperf_timer_get_count,
kperf_timer_set_count);
}
static int
sysctl_timer_action(struct sysctl_req *req)
{
return kperf_sysctl_get_set_unsigned_uint32(req, kperf_timer_get_action,
kperf_timer_set_action);
}
static int
sysctl_timer_pet(struct sysctl_req *req)
{
return kperf_sysctl_get_set_uint32(req, kperf_timer_get_petid,
kperf_timer_set_petid);
}
static int
sysctl_bless_preempt(struct sysctl_req *req)
{
return sysctl_io_number(req, ktrace_root_set_owner_allowed,
sizeof(ktrace_root_set_owner_allowed),
&ktrace_root_set_owner_allowed, NULL);
}
static int
sysctl_kperf_reset(struct sysctl_req *req)
{
int should_reset = 0;
int error = sysctl_io_number(req, should_reset, sizeof(should_reset),
&should_reset, NULL);
if (error) {
return error;
}
if (should_reset) {
ktrace_reset(KTRACE_KPERF);
}
return 0;
}
static int
sysctl_pet_idle_rate(struct sysctl_req *req)
{
return kperf_sysctl_get_set_int(req, kperf_get_pet_idle_rate,
kperf_set_pet_idle_rate);
}
static int
sysctl_lightweight_pet(struct sysctl_req *req)
{
return kperf_sysctl_get_set_int(req, kperf_get_lightweight_pet,
kperf_set_lightweight_pet);
}
static int
sysctl_kdbg_cswitch(struct sysctl_req *req)
{
return kperf_sysctl_get_set_int(req, kperf_kdbg_cswitch_get,
kperf_kdbg_cswitch_set);
}
static int
kperf_sysctl SYSCTL_HANDLER_ARGS
{
#pragma unused(oidp, arg2)
int ret;
uintptr_t type = (uintptr_t)arg1;
lck_mtx_lock(ktrace_lock);
if (req->oldptr == USER_ADDR_NULL && req->newptr != USER_ADDR_NULL) {
if ((ret = ktrace_configure(KTRACE_KPERF))) {
lck_mtx_unlock(ktrace_lock);
return ret;
}
} else {
if ((ret = ktrace_read_check())) {
lck_mtx_unlock(ktrace_lock);
return ret;
}
}
switch (type) {
case REQ_ACTION_COUNT:
ret = sysctl_action_count(req);
break;
case REQ_ACTION_SAMPLERS:
ret = sysctl_action_samplers(req);
break;
case REQ_ACTION_USERDATA:
ret = sysctl_action_userdata(req);
break;
case REQ_TIMER_COUNT:
ret = sysctl_timer_count(req);
break;
case REQ_TIMER_PERIOD:
ret = sysctl_timer_period(req);
break;
case REQ_TIMER_PET:
ret = sysctl_timer_pet(req);
break;
case REQ_TIMER_ACTION:
ret = sysctl_timer_action(req);
break;
case REQ_SAMPLING:
ret = sysctl_sampling(req);
break;
case REQ_KDBG_CSWITCH:
ret = sysctl_kdbg_cswitch(req);
break;
case REQ_ACTION_FILTER_BY_TASK:
ret = sysctl_action_filter(req, TRUE);
break;
case REQ_ACTION_FILTER_BY_PID:
ret = sysctl_action_filter(req, FALSE);
break;
case REQ_KDEBUG_ACTION:
ret = sysctl_kdebug_action(req);
break;
case REQ_KDEBUG_FILTER:
ret = sysctl_kdebug_filter(req);
break;
case REQ_PET_IDLE_RATE:
ret = sysctl_pet_idle_rate(req);
break;
case REQ_BLESS_PREEMPT:
ret = sysctl_bless_preempt(req);
break;
case REQ_RESET:
ret = sysctl_kperf_reset(req);
break;
case REQ_ACTION_UCALLSTACK_DEPTH:
ret = sysctl_action_ucallstack_depth(req);
break;
case REQ_ACTION_KCALLSTACK_DEPTH:
ret = sysctl_action_kcallstack_depth(req);
break;
case REQ_LIGHTWEIGHT_PET:
ret = sysctl_lightweight_pet(req);
break;
default:
ret = ENOENT;
break;
}
lck_mtx_unlock(ktrace_lock);
return ret;
}
static int
kperf_sysctl_bless_handler SYSCTL_HANDLER_ARGS
{
#pragma unused(oidp, arg2)
int ret;
lck_mtx_lock(ktrace_lock);
if (req->newptr != USER_ADDR_NULL) {
if (!((ktrace_root_set_owner_allowed ||
ktrace_keep_ownership_on_reset) &&
kauth_cred_issuser(kauth_cred_get())))
{
if ((ret = ktrace_configure(KTRACE_KPERF))) {
lck_mtx_unlock(ktrace_lock);
return ret;
}
}
} else {
if ((ret = ktrace_read_check())) {
lck_mtx_unlock(ktrace_lock);
return ret;
}
}
if ((uintptr_t)arg1 == REQ_BLESS) {
ret = sysctl_bless(req);
} else {
ret = ENOENT;
}
lck_mtx_unlock(ktrace_lock);
return ret;
}
SYSCTL_NODE(, OID_AUTO, kperf, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"kperf");
SYSCTL_NODE(_kperf, OID_AUTO, action, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"action");
SYSCTL_PROC(_kperf_action, OID_AUTO, count,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
(void *)REQ_ACTION_COUNT,
sizeof(int), kperf_sysctl, "I", "Number of actions");
SYSCTL_PROC(_kperf_action, OID_AUTO, samplers,
CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
(void *)REQ_ACTION_SAMPLERS,
3 * sizeof(uint64_t), kperf_sysctl, "UQ",
"What to sample when a trigger fires an action");
SYSCTL_PROC(_kperf_action, OID_AUTO, userdata,
CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
(void *)REQ_ACTION_USERDATA,
3 * sizeof(uint64_t), kperf_sysctl, "UQ",
"User data to attribute to action");
SYSCTL_PROC(_kperf_action, OID_AUTO, filter_by_task,
CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
(void *)REQ_ACTION_FILTER_BY_TASK,
3 * sizeof(uint64_t), kperf_sysctl, "UQ",
"Apply a task filter to the action");
SYSCTL_PROC(_kperf_action, OID_AUTO, filter_by_pid,
CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
(void *)REQ_ACTION_FILTER_BY_PID,
3 * sizeof(uint64_t), kperf_sysctl, "UQ",
"Apply a pid filter to the action");
SYSCTL_PROC(_kperf_action, OID_AUTO, ucallstack_depth,
CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
(void *)REQ_ACTION_UCALLSTACK_DEPTH,
sizeof(int), kperf_sysctl, "I",
"Maximum number of frames to include in user callstacks");
SYSCTL_PROC(_kperf_action, OID_AUTO, kcallstack_depth,
CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
(void *)REQ_ACTION_KCALLSTACK_DEPTH,
sizeof(int), kperf_sysctl, "I",
"Maximum number of frames to include in kernel callstacks");
SYSCTL_NODE(_kperf, OID_AUTO, timer, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"timer");
SYSCTL_PROC(_kperf_timer, OID_AUTO, count,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
(void *)REQ_TIMER_COUNT,
sizeof(int), kperf_sysctl, "I", "Number of time triggers");
SYSCTL_PROC(_kperf_timer, OID_AUTO, period,
CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
(void *)REQ_TIMER_PERIOD,
2 * sizeof(uint64_t), kperf_sysctl, "UQ",
"Timer number and period");
SYSCTL_PROC(_kperf_timer, OID_AUTO, action,
CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
(void *)REQ_TIMER_ACTION,
2 * sizeof(uint64_t), kperf_sysctl, "UQ",
"Timer number and actionid");
SYSCTL_PROC(_kperf_timer, OID_AUTO, pet_timer,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
(void *)REQ_TIMER_PET,
sizeof(int), kperf_sysctl, "I", "Which timer ID does PET");
SYSCTL_NODE(_kperf, OID_AUTO, kdebug, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"kdebug");
SYSCTL_PROC(_kperf_kdebug, OID_AUTO, action,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
(void*)REQ_KDEBUG_ACTION,
sizeof(int), kperf_sysctl, "I", "ID of action to trigger on kdebug events");
SYSCTL_PROC(_kperf_kdebug, OID_AUTO, filter,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
(void*)REQ_KDEBUG_FILTER,
sizeof(int), kperf_sysctl, "P", "The filter that determines which kdebug events trigger a sample");
SYSCTL_PROC(_kperf, OID_AUTO, sampling,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
(void *)REQ_SAMPLING,
sizeof(int), kperf_sysctl, "I", "Sampling running");
SYSCTL_PROC(_kperf, OID_AUTO, reset,
CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED,
(void *)REQ_RESET,
0, kperf_sysctl, "-", "Reset kperf");
SYSCTL_PROC(_kperf, OID_AUTO, blessed_pid,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
(void *)REQ_BLESS,
sizeof(int), kperf_sysctl_bless_handler, "I", "Blessed pid");
SYSCTL_PROC(_kperf, OID_AUTO, blessed_preempt,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
(void *)REQ_BLESS_PREEMPT,
sizeof(int), kperf_sysctl, "I", "Blessed preemption");
SYSCTL_PROC(_kperf, OID_AUTO, kdbg_cswitch,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
(void *)REQ_KDBG_CSWITCH,
sizeof(int), kperf_sysctl, "I", "Generate context switch info");
SYSCTL_PROC(_kperf, OID_AUTO, pet_idle_rate,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
(void *)REQ_PET_IDLE_RATE,
sizeof(int), kperf_sysctl, "I",
"Rate at which unscheduled threads are forced to be sampled in "
"PET mode");
SYSCTL_PROC(_kperf, OID_AUTO, lightweight_pet,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
(void *)REQ_LIGHTWEIGHT_PET,
sizeof(int), kperf_sysctl, "I",
"Status of lightweight PET mode");
SYSCTL_INT(_kperf, OID_AUTO, debug_level, CTLFLAG_RW | CTLFLAG_LOCKED,
&kperf_debug_level, 0, "debug level");
#if DEVELOPMENT || DEBUG
SYSCTL_QUAD(_kperf, OID_AUTO, already_pending_ipis,
CTLFLAG_RD | CTLFLAG_LOCKED,
&kperf_pending_ipis, "");
#endif