#include <mach/mach_types.h>
#include <sys/errno.h>
#include <kperf/kperf.h>
#include <kperf/buffer.h>
#include <kperf/sample.h>
#include <kperf/context.h>
#include <kperf/action.h>
#include <kperf/pet.h>
#include <kperf/kptimer.h>
#include <kern/task.h>
#include <kern/kalloc.h>
#if defined(__x86_64__)
#include <i386/mp.h>
#endif
static LCK_MTX_DECLARE(kppet_mtx, &kperf_lck_grp);
static struct {
unsigned int g_actionid;
uint32_t g_idle_rate;
bool g_setup:1;
bool g_lightweight:1;
struct kperf_sample *g_sample;
thread_t g_sample_thread;
thread_t *g_threads;
unsigned int g_nthreads;
size_t g_threads_size;
task_t *g_tasks;
unsigned int g_ntasks;
size_t g_tasks_size;
} kppet = {
.g_actionid = 0,
.g_idle_rate = KPERF_PET_DEFAULT_IDLE_RATE,
};
bool kppet_lightweight_active = false;
_Atomic uint32_t kppet_gencount = 0;
static uint64_t kppet_sample_tasks(uint32_t idle_rate);
static void kppet_thread(void * param, wait_result_t wr);
static void
kppet_lock_assert_owned(void)
{
lck_mtx_assert(&kppet_mtx, LCK_MTX_ASSERT_OWNED);
}
static void
kppet_lock(void)
{
lck_mtx_lock(&kppet_mtx);
}
static void
kppet_unlock(void)
{
lck_mtx_unlock(&kppet_mtx);
}
void
kppet_on_cpu(thread_t thread, thread_continue_t continuation,
uintptr_t *starting_fp)
{
assert(thread != NULL);
assert(ml_get_interrupts_enabled() == FALSE);
uint32_t actionid = kppet.g_actionid;
if (actionid == 0) {
return;
}
if (thread->kperf_pet_gen != atomic_load(&kppet_gencount)) {
BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START,
atomic_load_explicit(&kppet_gencount,
memory_order_relaxed), thread->kperf_pet_gen);
task_t task = get_threadtask(thread);
struct kperf_context ctx = {
.cur_thread = thread,
.cur_task = task,
.cur_pid = task_pid(task),
.starting_fp = starting_fp,
};
struct kperf_sample *sample = kperf_intr_sample_buffer();
if (!sample) {
BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END, 1);
return;
}
unsigned int flags = SAMPLE_FLAG_NON_INTERRUPT | SAMPLE_FLAG_PEND_USER;
if (continuation != NULL) {
flags |= SAMPLE_FLAG_CONTINUATION;
}
kperf_sample(sample, &ctx, actionid, flags);
BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END);
} else {
BUF_VERB(PERF_PET_SAMPLE_THREAD,
os_atomic_load(&kppet_gencount, relaxed), thread->kperf_pet_gen);
}
}
#pragma mark - state transitions
static void
kppet_setup(void)
{
if (kppet.g_setup) {
return;
}
kern_return_t kr = kernel_thread_start(kppet_thread, NULL,
&kppet.g_sample_thread);
if (kr != KERN_SUCCESS) {
panic("kperf: failed to create PET thread %d", kr);
}
thread_set_thread_name(kppet.g_sample_thread, "kperf-pet-sampling");
kppet.g_setup = true;
}
void
kppet_config(unsigned int actionid)
{
if (actionid == 0 && !kppet.g_setup) {
return;
}
kppet_setup();
kppet_lock();
kppet.g_actionid = actionid;
if (actionid > 0) {
if (!kppet.g_sample) {
kppet.g_sample = kalloc_tag(sizeof(*kppet.g_sample),
VM_KERN_MEMORY_DIAG);
}
} else {
if (kppet.g_tasks) {
assert(kppet.g_tasks_size != 0);
kfree(kppet.g_tasks, kppet.g_tasks_size);
kppet.g_tasks = NULL;
kppet.g_tasks_size = 0;
kppet.g_ntasks = 0;
}
if (kppet.g_threads) {
assert(kppet.g_threads_size != 0);
kfree(kppet.g_threads, kppet.g_threads_size);
kppet.g_threads = NULL;
kppet.g_threads_size = 0;
kppet.g_nthreads = 0;
}
if (kppet.g_sample != NULL) {
kfree(kppet.g_sample, sizeof(*kppet.g_sample));
kppet.g_sample = NULL;
}
}
kppet_unlock();
}
void
kppet_reset(void)
{
kppet_config(0);
kppet_set_idle_rate(KPERF_PET_DEFAULT_IDLE_RATE);
kppet_set_lightweight_pet(0);
}
void
kppet_wake_thread(void)
{
thread_wakeup(&kppet);
}
__attribute__((noreturn))
static void
kppet_thread(void * __unused param, wait_result_t __unused wr)
{
kppet_lock();
for (;;) {
BUF_INFO(PERF_PET_IDLE);
do {
(void)lck_mtx_sleep(&kppet_mtx, LCK_SLEEP_DEFAULT, &kppet,
THREAD_UNINT);
} while (kppet.g_actionid == 0);
BUF_INFO(PERF_PET_RUN);
uint64_t sampledur_abs = kppet_sample_tasks(kppet.g_idle_rate);
kptimer_pet_enter(sampledur_abs);
}
}
#pragma mark - sampling
static void
kppet_sample_thread(int pid, task_t task, thread_t thread, uint32_t idle_rate)
{
kppet_lock_assert_owned();
uint32_t sample_flags = SAMPLE_FLAG_IDLE_THREADS |
SAMPLE_FLAG_THREAD_ONLY;
BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START);
struct kperf_context ctx = {
.cur_thread = thread,
.cur_task = task,
.cur_pid = pid,
};
boolean_t thread_dirty = kperf_thread_get_dirty(thread);
if (thread_dirty) {
kperf_thread_set_dirty(thread, FALSE);
} else if ((thread->kperf_pet_cnt % idle_rate) != 0) {
sample_flags |= SAMPLE_FLAG_EMPTY_CALLSTACK;
}
thread->kperf_pet_cnt++;
kperf_sample(kppet.g_sample, &ctx, kppet.g_actionid, sample_flags);
kperf_sample_user(&kppet.g_sample->usample, &ctx, kppet.g_actionid,
sample_flags);
BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END);
}
static kern_return_t
kppet_threads_prepare(task_t task)
{
kppet_lock_assert_owned();
vm_size_t threads_size_needed;
for (;;) {
task_lock(task);
if (!task->active) {
task_unlock(task);
return KERN_FAILURE;
}
threads_size_needed = task->thread_count * sizeof(thread_t);
if (threads_size_needed <= kppet.g_threads_size) {
break;
}
task_unlock(task);
if (kppet.g_threads_size != 0) {
kfree(kppet.g_threads, kppet.g_threads_size);
}
assert(threads_size_needed > 0);
kppet.g_threads_size = threads_size_needed;
kppet.g_threads = kalloc_tag(kppet.g_threads_size, VM_KERN_MEMORY_DIAG);
if (kppet.g_threads == NULL) {
kppet.g_threads_size = 0;
return KERN_RESOURCE_SHORTAGE;
}
}
thread_t thread;
kppet.g_nthreads = 0;
queue_iterate(&(task->threads), thread, thread_t, task_threads) {
thread_reference_internal(thread);
kppet.g_threads[kppet.g_nthreads++] = thread;
}
task_unlock(task);
return (kppet.g_nthreads > 0) ? KERN_SUCCESS : KERN_FAILURE;
}
static void
kppet_sample_task(task_t task, uint32_t idle_rate)
{
kppet_lock_assert_owned();
assert(task != kernel_task);
if (task == kernel_task) {
return;
}
BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_START);
int pid = task_pid(task);
if (kperf_action_has_task(kppet.g_actionid)) {
struct kperf_context ctx = {
.cur_task = task,
.cur_pid = pid,
};
kperf_sample(kppet.g_sample, &ctx, kppet.g_actionid,
SAMPLE_FLAG_TASK_ONLY);
}
if (!kperf_action_has_thread(kppet.g_actionid)) {
BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END);
return;
}
kern_return_t kr = task_suspend_internal(task);
if (kr != KERN_SUCCESS) {
BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END, 1);
return;
}
kr = kppet_threads_prepare(task);
if (kr != KERN_SUCCESS) {
BUF_INFO(PERF_PET_ERROR, ERR_THREAD, kr);
goto out;
}
for (unsigned int i = 0; i < kppet.g_nthreads; i++) {
thread_t thread = kppet.g_threads[i];
assert(thread != THREAD_NULL);
kppet_sample_thread(pid, task, thread, idle_rate);
thread_deallocate(kppet.g_threads[i]);
}
out:
task_resume_internal(task);
BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END, kppet.g_nthreads);
}
static kern_return_t
kppet_tasks_prepare(void)
{
kppet_lock_assert_owned();
vm_size_t size_needed = 0;
for (;;) {
lck_mtx_lock(&tasks_threads_lock);
size_needed = tasks_count * sizeof(task_t);
if (size_needed <= kppet.g_tasks_size) {
break;
}
lck_mtx_unlock(&tasks_threads_lock);
if (size_needed > kppet.g_tasks_size) {
if (kppet.g_tasks_size != 0) {
kfree(kppet.g_tasks, kppet.g_tasks_size);
}
assert(size_needed > 0);
kppet.g_tasks_size = size_needed;
kppet.g_tasks = kalloc_tag(kppet.g_tasks_size, VM_KERN_MEMORY_DIAG);
if (!kppet.g_tasks) {
kppet.g_tasks_size = 0;
return KERN_RESOURCE_SHORTAGE;
}
}
}
task_t task = TASK_NULL;
kppet.g_ntasks = 0;
queue_iterate(&tasks, task, task_t, tasks) {
bool eligible_task = task != kernel_task;
if (eligible_task) {
task_reference_internal(task);
kppet.g_tasks[kppet.g_ntasks++] = task;
}
}
lck_mtx_unlock(&tasks_threads_lock);
return KERN_SUCCESS;
}
static uint64_t
kppet_sample_tasks(uint32_t idle_rate)
{
kppet_lock_assert_owned();
assert(kppet.g_actionid > 0);
uint64_t start_abs = mach_absolute_time();
BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_START);
kern_return_t kr = kppet_tasks_prepare();
if (kr != KERN_SUCCESS) {
BUF_INFO(PERF_PET_ERROR, ERR_TASK, kr);
BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_END);
return mach_absolute_time() - start_abs;
}
for (unsigned int i = 0; i < kppet.g_ntasks; i++) {
task_t task = kppet.g_tasks[i];
assert(task != TASK_NULL);
kppet_sample_task(task, idle_rate);
task_deallocate(task);
kppet.g_tasks[i] = TASK_NULL;
}
BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_END, kppet.g_ntasks);
kppet.g_ntasks = 0;
return mach_absolute_time() - start_abs;
}
#pragma mark - sysctl accessors
int
kppet_get_idle_rate(void)
{
return kppet.g_idle_rate;
}
int
kppet_set_idle_rate(int new_idle_rate)
{
kppet.g_idle_rate = new_idle_rate;
return 0;
}
void
kppet_lightweight_active_update(void)
{
kppet_lightweight_active = (kperf_is_sampling() && kppet.g_lightweight);
kperf_on_cpu_update();
}
int
kppet_get_lightweight_pet(void)
{
return kppet.g_lightweight;
}
int
kppet_set_lightweight_pet(int on)
{
if (kperf_is_sampling()) {
return EBUSY;
}
kppet.g_lightweight = (on == 1);
kppet_lightweight_active_update();
return 0;
}