#include <mach/mach_types.h>
#include <mach/machine.h>
#include <machine/machine_routines.h>
#include <machine/sched_param.h>
#include <machine/machine_cpu.h>
#include <kern/kern_types.h>
#include <kern/debug.h>
#include <kern/machine.h>
#include <kern/misc_protos.h>
#include <kern/processor.h>
#include <kern/queue.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/sched_clutch.h>
#include <machine/atomic.h>
#include <kern/sched_clutch.h>
#include <sys/kdebug.h>
#if CONFIG_SCHED_CLUTCH
static void sched_clutch_root_init(sched_clutch_root_t, processor_set_t);
static void sched_clutch_root_bucket_init(sched_clutch_root_bucket_t, sched_bucket_t);
static void sched_clutch_root_pri_update(sched_clutch_root_t);
static sched_clutch_root_bucket_t sched_clutch_root_highest_root_bucket(sched_clutch_root_t, uint64_t);
static void sched_clutch_root_urgency_inc(sched_clutch_root_t, thread_t);
static void sched_clutch_root_urgency_dec(sched_clutch_root_t, thread_t);
static uint64_t sched_clutch_root_bucket_deadline_calculate(sched_clutch_root_bucket_t, uint64_t);
static void sched_clutch_root_bucket_deadline_update(sched_clutch_root_bucket_t, sched_clutch_root_t, uint64_t);
static int sched_clutch_root_bucket_pri_compare(sched_clutch_root_bucket_t, sched_clutch_root_bucket_t);
static void sched_clutch_bucket_hierarchy_insert(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t);
static void sched_clutch_bucket_hierarchy_remove(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t);
static boolean_t sched_clutch_bucket_runnable(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t);
static boolean_t sched_clutch_bucket_update(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t);
static void sched_clutch_bucket_empty(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t);
static void sched_clutch_bucket_cpu_usage_update(sched_clutch_bucket_t, uint64_t);
static void sched_clutch_bucket_cpu_blocked_update(sched_clutch_bucket_t, uint64_t);
static uint8_t sched_clutch_bucket_pri_calculate(sched_clutch_bucket_t, uint64_t);
static sched_clutch_bucket_t sched_clutch_root_bucket_highest_clutch_bucket(sched_clutch_root_bucket_t);
static uint32_t sched_clutch_run_bucket_incr(sched_clutch_t, sched_bucket_t);
static uint32_t sched_clutch_run_bucket_decr(sched_clutch_t, sched_bucket_t);
static void sched_clutch_bucket_cpu_adjust(sched_clutch_bucket_t);
static void sched_clutch_bucket_timeshare_update(sched_clutch_bucket_t);
static boolean_t sched_thread_sched_pri_promoted(thread_t);
static boolean_t sched_clutch_thread_insert(sched_clutch_root_t, thread_t, integer_t);
static void sched_clutch_thread_remove(sched_clutch_root_t, thread_t, uint64_t);
static thread_t sched_clutch_thread_highest(sched_clutch_root_t);
static uint32_t sched_clutch_root_urgency(sched_clutch_root_t);
static uint32_t sched_clutch_root_count_sum(sched_clutch_root_t);
static int sched_clutch_root_priority(sched_clutch_root_t);
static inline void sched_clutch_hierarchy_locked_assert(sched_clutch_root_t);
priority_queue_compare_fn_t sched_clutch_root_bucket_compare;
#define SCHED_CLUTCH_INVALID_TIME_32 ((uint32_t)~0)
#define SCHED_CLUTCH_INVALID_TIME_64 ((uint64_t)~0)
static uint32_t sched_clutch_root_bucket_wcel_us[TH_BUCKET_SCHED_MAX] = {
SCHED_CLUTCH_INVALID_TIME_32,
0,
37500,
75000,
150000,
250000
};
static uint64_t sched_clutch_root_bucket_wcel[TH_BUCKET_SCHED_MAX] = {0};
#define SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED (SCHED_CLUTCH_INVALID_TIME_64)
static uint32_t sched_clutch_root_bucket_warp_us[TH_BUCKET_SCHED_MAX] = {
SCHED_CLUTCH_INVALID_TIME_32,
8000,
4000,
2000,
1000,
0
};
static uint64_t sched_clutch_root_bucket_warp[TH_BUCKET_SCHED_MAX] = {0};
static uint32_t sched_clutch_thread_quantum_us[TH_BUCKET_SCHED_MAX] = {
10000,
10000,
8000,
6000,
4000,
2000
};
static uint64_t sched_clutch_thread_quantum[TH_BUCKET_SCHED_MAX] = {0};
enum sched_clutch_state {
SCHED_CLUTCH_STATE_EMPTY = 0,
SCHED_CLUTCH_STATE_RUNNABLE,
};
static void
sched_clutch_us_to_abstime(uint32_t *us_vals, uint64_t *abstime_vals)
{
for (int i = 0; i < TH_BUCKET_SCHED_MAX; i++) {
if (us_vals[i] == SCHED_CLUTCH_INVALID_TIME_32) {
abstime_vals[i] = SCHED_CLUTCH_INVALID_TIME_64;
} else {
clock_interval_to_absolutetime_interval(us_vals[i],
NSEC_PER_USEC, &abstime_vals[i]);
}
}
}
#if DEVELOPMENT || DEBUG
static inline void
sched_clutch_hierarchy_locked_assert(
sched_clutch_root_t root_clutch)
{
pset_assert_locked(root_clutch->scr_pset);
}
#else
static inline void
sched_clutch_hierarchy_locked_assert(
__unused sched_clutch_root_t root_clutch)
{
}
#endif
static void
sched_clutch_thr_count_inc(
uint16_t *thr_count)
{
if (__improbable(os_inc_overflow(thr_count))) {
panic("sched_clutch thread count overflowed!");
}
}
static void
sched_clutch_thr_count_dec(
uint16_t *thr_count)
{
if (__improbable(os_dec_overflow(thr_count))) {
panic("sched_clutch thread count underflowed!");
}
}
static void
sched_clutch_root_init(
sched_clutch_root_t root_clutch,
processor_set_t pset)
{
root_clutch->scr_thr_count = 0;
root_clutch->scr_priority = NOPRI;
root_clutch->scr_urgency = 0;
root_clutch->scr_pset = pset;
queue_init(&root_clutch->scr_clutch_buckets);
queue_init(&root_clutch->scr_foreign_buckets);
sched_clutch_root_bucket_compare = priority_heap_make_comparator(a, b, struct sched_clutch_root_bucket, scrb_pqlink, {
return (a->scrb_deadline < b->scrb_deadline) ? 1 : ((a->scrb_deadline == b->scrb_deadline) ? 0 : -1);
});
priority_queue_init(&root_clutch->scr_root_buckets, PRIORITY_QUEUE_GENERIC_KEY | PRIORITY_QUEUE_MIN_HEAP);
bitmap_zero(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX);
bitmap_zero(root_clutch->scr_warp_available, TH_BUCKET_SCHED_MAX);
for (uint32_t i = 0; i < TH_BUCKET_SCHED_MAX; i++) {
sched_clutch_root_bucket_init(&root_clutch->scr_buckets[i], i);
}
}
static void
sched_clutch_root_bucket_init(
sched_clutch_root_bucket_t root_bucket,
sched_bucket_t bucket)
{
root_bucket->scrb_bucket = bucket;
priority_queue_init(&root_bucket->scrb_clutch_buckets, PRIORITY_QUEUE_BUILTIN_KEY | PRIORITY_QUEUE_MAX_HEAP);
priority_queue_entry_init(&root_bucket->scrb_pqlink);
root_bucket->scrb_deadline = SCHED_CLUTCH_INVALID_TIME_64;
root_bucket->scrb_warped_deadline = 0;
root_bucket->scrb_warp_remaining = sched_clutch_root_bucket_warp[root_bucket->scrb_bucket];
}
static int
sched_clutch_root_bucket_pri_compare(
sched_clutch_root_bucket_t a,
sched_clutch_root_bucket_t b)
{
sched_clutch_bucket_t a_highest = sched_clutch_root_bucket_highest_clutch_bucket(a);
sched_clutch_bucket_t b_highest = sched_clutch_root_bucket_highest_clutch_bucket(b);
return (a_highest->scb_priority > b_highest->scb_priority) ?
1 : ((a_highest->scb_priority == b_highest->scb_priority) ? 0 : -1);
}
static boolean_t
sched_clutch_root_select_aboveui(
sched_clutch_root_t root_clutch)
{
if (bitmap_test(root_clutch->scr_runnable_bitmap, TH_BUCKET_FIXPRI)) {
sched_clutch_root_bucket_t root_bucket_aboveui = &root_clutch->scr_buckets[TH_BUCKET_FIXPRI];
sched_clutch_root_bucket_t root_bucket_sharefg = &root_clutch->scr_buckets[TH_BUCKET_SHARE_FG];
if (!bitmap_test(root_clutch->scr_runnable_bitmap, TH_BUCKET_SHARE_FG)) {
return true;
}
if (sched_clutch_root_bucket_pri_compare(root_bucket_aboveui, root_bucket_sharefg) >= 0) {
return true;
}
}
return false;
}
static sched_clutch_root_bucket_t
sched_clutch_root_highest_root_bucket(
sched_clutch_root_t root_clutch,
uint64_t timestamp)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
if (bitmap_lsb_first(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX) == -1) {
return NULL;
}
if (sched_clutch_root_select_aboveui(root_clutch)) {
return &root_clutch->scr_buckets[TH_BUCKET_FIXPRI];
}
sched_clutch_root_bucket_t edf_bucket = priority_queue_min(&root_clutch->scr_root_buckets, struct sched_clutch_root_bucket, scrb_pqlink);
sched_clutch_root_bucket_t warp_bucket = NULL;
int warp_bucket_index = -1;
evaluate_warp_buckets:
warp_bucket_index = bitmap_lsb_first(root_clutch->scr_warp_available, TH_BUCKET_SCHED_MAX);
if ((warp_bucket_index == -1) || (warp_bucket_index >= edf_bucket->scrb_bucket)) {
sched_clutch_root_bucket_deadline_update(edf_bucket, root_clutch, timestamp);
edf_bucket->scrb_warp_remaining = sched_clutch_root_bucket_warp[edf_bucket->scrb_bucket];
edf_bucket->scrb_warped_deadline = SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED;
bitmap_set(root_clutch->scr_warp_available, edf_bucket->scrb_bucket);
return edf_bucket;
}
warp_bucket = &root_clutch->scr_buckets[warp_bucket_index];
if (warp_bucket->scrb_warped_deadline == SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED) {
warp_bucket->scrb_warped_deadline = timestamp + warp_bucket->scrb_warp_remaining;
sched_clutch_root_bucket_deadline_update(warp_bucket, root_clutch, timestamp);
return warp_bucket;
}
if (warp_bucket->scrb_warped_deadline > timestamp) {
sched_clutch_root_bucket_deadline_update(warp_bucket, root_clutch, timestamp);
return warp_bucket;
}
warp_bucket->scrb_warp_remaining = 0;
bitmap_clear(root_clutch->scr_warp_available, warp_bucket->scrb_bucket);
goto evaluate_warp_buckets;
}
static uint64_t
sched_clutch_root_bucket_deadline_calculate(
sched_clutch_root_bucket_t root_bucket,
uint64_t timestamp)
{
if (root_bucket->scrb_bucket < TH_BUCKET_SHARE_FG) {
return 0;
}
return timestamp + sched_clutch_root_bucket_wcel[root_bucket->scrb_bucket];
}
static void
sched_clutch_root_bucket_deadline_update(
sched_clutch_root_bucket_t root_bucket,
sched_clutch_root_t root_clutch,
uint64_t timestamp)
{
if (root_bucket->scrb_bucket == TH_BUCKET_FIXPRI) {
return;
}
uint64_t old_deadline = root_bucket->scrb_deadline;
uint64_t new_deadline = sched_clutch_root_bucket_deadline_calculate(root_bucket, timestamp);
assert(old_deadline <= new_deadline);
if (old_deadline != new_deadline) {
root_bucket->scrb_deadline = new_deadline;
priority_queue_entry_decrease(&root_clutch->scr_root_buckets, &root_bucket->scrb_pqlink, PRIORITY_QUEUE_KEY_NONE, sched_clutch_root_bucket_compare);
}
}
static void
sched_clutch_root_bucket_runnable(
sched_clutch_root_bucket_t root_bucket,
sched_clutch_root_t root_clutch,
uint64_t timestamp)
{
bitmap_set(root_clutch->scr_runnable_bitmap, root_bucket->scrb_bucket);
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_ROOT_BUCKET_STATE) | DBG_FUNC_NONE,
root_bucket->scrb_bucket, SCHED_CLUTCH_STATE_RUNNABLE, 0, 0, 0);
if (root_bucket->scrb_bucket == TH_BUCKET_FIXPRI) {
return;
}
root_bucket->scrb_deadline = sched_clutch_root_bucket_deadline_calculate(root_bucket, timestamp);
priority_queue_insert(&root_clutch->scr_root_buckets, &root_bucket->scrb_pqlink, PRIORITY_QUEUE_KEY_NONE, sched_clutch_root_bucket_compare);
if (root_bucket->scrb_warp_remaining) {
bitmap_set(root_clutch->scr_warp_available, root_bucket->scrb_bucket);
}
}
static void
sched_clutch_root_bucket_empty(
sched_clutch_root_bucket_t root_bucket,
sched_clutch_root_t root_clutch,
uint64_t timestamp)
{
bitmap_clear(root_clutch->scr_runnable_bitmap, root_bucket->scrb_bucket);
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_ROOT_BUCKET_STATE) | DBG_FUNC_NONE,
root_bucket->scrb_bucket, SCHED_CLUTCH_STATE_EMPTY, 0, 0, 0);
if (root_bucket->scrb_bucket == TH_BUCKET_FIXPRI) {
return;
}
priority_queue_remove(&root_clutch->scr_root_buckets, &root_bucket->scrb_pqlink, sched_clutch_root_bucket_compare);
bitmap_clear(root_clutch->scr_warp_available, root_bucket->scrb_bucket);
if (root_bucket->scrb_warped_deadline > timestamp) {
root_bucket->scrb_warp_remaining = root_bucket->scrb_warped_deadline - timestamp;
} else if (root_bucket->scrb_warped_deadline != SCHED_CLUTCH_ROOT_BUCKET_WARP_UNUSED) {
root_bucket->scrb_warp_remaining = 0;
}
}
static void
sched_clutch_root_pri_update(
sched_clutch_root_t root_clutch)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
if (bitmap_lsb_first(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX) == -1) {
root_clutch->scr_priority = NOPRI;
assert(root_clutch->scr_urgency == 0);
return;
}
sched_clutch_root_bucket_t root_bucket = NULL;
if (sched_clutch_root_select_aboveui(root_clutch)) {
root_bucket = &root_clutch->scr_buckets[TH_BUCKET_FIXPRI];
} else {
int root_bucket_index = bitmap_lsb_next(root_clutch->scr_runnable_bitmap, TH_BUCKET_SCHED_MAX, TH_BUCKET_FIXPRI);
assert(root_bucket_index != -1);
root_bucket = &root_clutch->scr_buckets[root_bucket_index];
}
sched_clutch_bucket_t clutch_bucket = sched_clutch_root_bucket_highest_clutch_bucket(root_bucket);
root_clutch->scr_priority = priority_queue_max_key(&clutch_bucket->scb_clutchpri_prioq);
}
static void
sched_clutch_root_urgency_inc(
sched_clutch_root_t root_clutch,
thread_t thread)
{
if (SCHED(priority_is_urgent)(thread->sched_pri)) {
root_clutch->scr_urgency++;
}
}
static void
sched_clutch_root_urgency_dec(
sched_clutch_root_t root_clutch,
thread_t thread)
{
if (SCHED(priority_is_urgent)(thread->sched_pri)) {
root_clutch->scr_urgency--;
}
}
#define SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT (8)
uint8_t sched_clutch_bucket_interactive_pri = SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT;
uint64_t sched_clutch_bucket_adjust_threshold = 0;
#define SCHED_CLUTCH_BUCKET_ADJUST_THRESHOLD_USECS (500000)
#define SCHED_CLUTCH_BUCKET_ADJUST_RATIO (10)
uint64_t sched_clutch_bucket_interactivity_delta = 0;
#define SCHED_CLUTCH_BUCKET_INTERACTIVITY_DELTA_USECS_DEFAULT (25000)
#define SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT (8)
uint8_t sched_clutch_bucket_pri_boost = SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT;
#define SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID (uint32_t)(~0)
static void
sched_clutch_bucket_init(
sched_clutch_bucket_t clutch_bucket,
sched_clutch_t clutch,
sched_bucket_t bucket)
{
bzero(clutch_bucket, sizeof(struct sched_clutch_bucket));
clutch_bucket->scb_bucket = bucket;
clutch_bucket->scb_priority = 0;
clutch_bucket->scb_interactivity_score = (sched_clutch_bucket_interactive_pri * 2);
clutch_bucket->scb_foreign = false;
os_atomic_store(&clutch_bucket->scb_timeshare_tick, 0, relaxed);
os_atomic_store(&clutch_bucket->scb_pri_shift, INT8_MAX, relaxed);
clutch_bucket->scb_interactivity_ts = 0;
clutch_bucket->scb_blocked_ts = SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID;
priority_queue_entry_init(&clutch_bucket->scb_pqlink);
clutch_bucket->scb_clutch = clutch;
clutch_bucket->scb_root = NULL;
priority_queue_init(&clutch_bucket->scb_clutchpri_prioq, PRIORITY_QUEUE_BUILTIN_KEY | PRIORITY_QUEUE_MAX_HEAP);
run_queue_init(&clutch_bucket->scb_runq);
}
void
sched_clutch_init_with_thread_group(
sched_clutch_t clutch,
struct thread_group *tg)
{
os_atomic_store(&clutch->sc_thr_count, 0, relaxed);
for (uint32_t i = 0; i < TH_BUCKET_SCHED_MAX; i++) {
sched_clutch_bucket_init(&(clutch->sc_clutch_buckets[i]), clutch, i);
}
clutch->sc_tg = tg;
os_atomic_store(&clutch->sc_tg_priority, 0, relaxed);
}
void
sched_clutch_destroy(
__unused sched_clutch_t clutch)
{
assert(os_atomic_load(&clutch->sc_thr_count, relaxed) == 0);
}
static void
sched_clutch_bucket_hierarchy_insert(
sched_clutch_root_t root_clutch,
sched_clutch_bucket_t clutch_bucket,
sched_bucket_t bucket,
uint64_t timestamp)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
if (bucket > TH_BUCKET_FIXPRI) {
enqueue_tail(&root_clutch->scr_clutch_buckets, &clutch_bucket->scb_listlink);
}
sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket];
if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) {
sched_clutch_root_bucket_runnable(root_bucket, root_clutch, timestamp);
}
priority_queue_insert(&root_bucket->scrb_clutch_buckets, &clutch_bucket->scb_pqlink, clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
os_atomic_store(&clutch_bucket->scb_root, root_clutch, relaxed);
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_STATE) | DBG_FUNC_NONE,
thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, SCHED_CLUTCH_STATE_RUNNABLE, clutch_bucket->scb_priority, 0);
}
static void
sched_clutch_bucket_hierarchy_remove(
sched_clutch_root_t root_clutch,
sched_clutch_bucket_t clutch_bucket,
sched_bucket_t bucket,
uint64_t timestamp)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
if (bucket > TH_BUCKET_FIXPRI) {
remqueue(&clutch_bucket->scb_listlink);
}
sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket];
priority_queue_remove(&root_bucket->scrb_clutch_buckets, &clutch_bucket->scb_pqlink, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
os_atomic_store(&clutch_bucket->scb_root, NULL, relaxed);
clutch_bucket->scb_blocked_ts = timestamp;
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_STATE) | DBG_FUNC_NONE,
thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, SCHED_CLUTCH_STATE_EMPTY, 0, 0);
if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) {
sched_clutch_root_bucket_empty(root_bucket, root_clutch, timestamp);
}
}
static uint8_t
sched_clutch_bucket_base_pri(
sched_clutch_bucket_t clutch_bucket)
{
uint8_t clutch_boost = 0;
assert(clutch_bucket->scb_runq.count != 0);
sched_clutch_t clutch = clutch_bucket->scb_clutch;
uint8_t max_pri = priority_queue_empty(&clutch_bucket->scb_clutchpri_prioq) ? 0 : priority_queue_max_key(&clutch_bucket->scb_clutchpri_prioq);
if ((clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) ||
(os_atomic_load(&clutch->sc_tg_priority, relaxed) != SCHED_CLUTCH_TG_PRI_LOW)) {
clutch_boost = sched_clutch_bucket_pri_boost;
}
return max_pri + clutch_boost;
}
static uint8_t
sched_clutch_bucket_interactivity_score_calculate(
sched_clutch_bucket_t clutch_bucket,
uint64_t timestamp)
{
if (clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) {
assert(clutch_bucket->scb_interactivity_score == (2 * sched_clutch_bucket_interactive_pri));
return clutch_bucket->scb_interactivity_score;
}
if (clutch_bucket->scb_interactivity_ts == 0) {
clutch_bucket->scb_interactivity_ts = timestamp;
return clutch_bucket->scb_interactivity_score;
}
if (timestamp < (clutch_bucket->scb_interactivity_ts + sched_clutch_bucket_interactivity_delta)) {
return clutch_bucket->scb_interactivity_score;
}
sched_clutch_bucket_cpu_adjust(clutch_bucket);
clutch_bucket->scb_interactivity_ts = timestamp;
sched_clutch_bucket_cpu_data_t scb_cpu_data;
scb_cpu_data.scbcd_cpu_data_packed = os_atomic_load_wide(&clutch_bucket->scb_cpu_data.scbcd_cpu_data_packed, relaxed);
clutch_cpu_data_t cpu_used = scb_cpu_data.cpu_data.scbcd_cpu_used;
clutch_cpu_data_t cpu_blocked = scb_cpu_data.cpu_data.scbcd_cpu_blocked;
if ((cpu_blocked == 0) && (cpu_used == 0)) {
return clutch_bucket->scb_interactivity_score;
}
uint8_t interactive_score = 0;
if (cpu_blocked > cpu_used) {
interactive_score = sched_clutch_bucket_interactive_pri +
((sched_clutch_bucket_interactive_pri * (cpu_blocked - cpu_used)) / cpu_blocked);
} else {
interactive_score = ((sched_clutch_bucket_interactive_pri * cpu_blocked) / cpu_used);
}
clutch_bucket->scb_interactivity_score = interactive_score;
return interactive_score;
}
static uint8_t
sched_clutch_bucket_pri_calculate(
sched_clutch_bucket_t clutch_bucket,
uint64_t timestamp)
{
if (clutch_bucket->scb_thr_count == 0) {
return 0;
}
uint8_t base_pri = sched_clutch_bucket_base_pri(clutch_bucket);
uint8_t interactive_score = sched_clutch_bucket_interactivity_score_calculate(clutch_bucket, timestamp);
assert(((uint64_t)base_pri + interactive_score) <= UINT8_MAX);
uint8_t pri = base_pri + interactive_score;
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_PRI) | DBG_FUNC_NONE,
thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, pri, interactive_score, 0);
return pri;
}
static sched_clutch_bucket_t
sched_clutch_root_bucket_highest_clutch_bucket(
sched_clutch_root_bucket_t root_bucket)
{
if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) {
return NULL;
}
return priority_queue_max(&root_bucket->scrb_clutch_buckets, struct sched_clutch_bucket, scb_pqlink);
}
static boolean_t
sched_clutch_bucket_runnable(
sched_clutch_bucket_t clutch_bucket,
sched_clutch_root_t root_clutch,
uint64_t timestamp)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
sched_clutch_bucket_cpu_blocked_update(clutch_bucket, timestamp);
clutch_bucket->scb_priority = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp);
sched_clutch_bucket_hierarchy_insert(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp);
sched_clutch_bucket_timeshare_update(clutch_bucket);
int16_t root_old_pri = root_clutch->scr_priority;
sched_clutch_root_pri_update(root_clutch);
return root_clutch->scr_priority > root_old_pri;
}
static boolean_t
sched_clutch_bucket_update(
sched_clutch_bucket_t clutch_bucket,
sched_clutch_root_t root_clutch,
uint64_t timestamp)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
uint64_t new_pri = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp);
if (new_pri == clutch_bucket->scb_priority) {
return false;
}
struct priority_queue *bucket_prioq = &root_clutch->scr_buckets[clutch_bucket->scb_bucket].scrb_clutch_buckets;
if (new_pri < clutch_bucket->scb_priority) {
clutch_bucket->scb_priority = new_pri;
priority_queue_entry_decrease(bucket_prioq, &clutch_bucket->scb_pqlink,
clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
} else {
clutch_bucket->scb_priority = new_pri;
priority_queue_entry_increase(bucket_prioq, &clutch_bucket->scb_pqlink,
clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
}
int16_t root_old_pri = root_clutch->scr_priority;
sched_clutch_root_pri_update(root_clutch);
return root_clutch->scr_priority > root_old_pri;
}
static void
sched_clutch_bucket_empty(
sched_clutch_bucket_t clutch_bucket,
sched_clutch_root_t root_clutch,
uint64_t timestamp)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
sched_clutch_bucket_hierarchy_remove(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp);
clutch_bucket->scb_priority = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp);
sched_clutch_root_pri_update(root_clutch);
}
void
sched_clutch_cpu_usage_update(
thread_t thread,
uint64_t delta)
{
if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) {
return;
}
sched_clutch_t clutch = sched_clutch_for_thread(thread);
sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]);
sched_clutch_bucket_cpu_usage_update(clutch_bucket, delta);
}
static void
sched_clutch_bucket_cpu_usage_update(
sched_clutch_bucket_t clutch_bucket,
uint64_t delta)
{
if (clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) {
return;
}
delta = MIN(delta, CLUTCH_CPU_DATA_MAX);
os_atomic_add_orig(&(clutch_bucket->scb_cpu_data.cpu_data.scbcd_cpu_used), (clutch_cpu_data_t)delta, relaxed);
}
static void
sched_clutch_bucket_cpu_blocked_update(
sched_clutch_bucket_t clutch_bucket,
uint64_t timestamp)
{
if ((clutch_bucket->scb_bucket == TH_BUCKET_FIXPRI) ||
(clutch_bucket->scb_blocked_ts == SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID)) {
return;
}
uint64_t blocked_time = timestamp - clutch_bucket->scb_blocked_ts;
if (blocked_time > sched_clutch_bucket_adjust_threshold) {
blocked_time = sched_clutch_bucket_adjust_threshold;
}
blocked_time = MIN(blocked_time, CLUTCH_CPU_DATA_MAX);
clutch_cpu_data_t __assert_only cpu_blocked_orig = os_atomic_add_orig(&(clutch_bucket->scb_cpu_data.cpu_data.scbcd_cpu_blocked), (clutch_cpu_data_t)blocked_time, relaxed);
assert(blocked_time <= (CLUTCH_CPU_DATA_MAX - cpu_blocked_orig));
}
static void
sched_clutch_bucket_cpu_adjust(
sched_clutch_bucket_t clutch_bucket)
{
sched_clutch_bucket_cpu_data_t old_cpu_data = {};
sched_clutch_bucket_cpu_data_t new_cpu_data = {};
do {
old_cpu_data.scbcd_cpu_data_packed = os_atomic_load_wide(&clutch_bucket->scb_cpu_data.scbcd_cpu_data_packed, relaxed);
clutch_cpu_data_t cpu_used = old_cpu_data.cpu_data.scbcd_cpu_used;
clutch_cpu_data_t cpu_blocked = old_cpu_data.cpu_data.scbcd_cpu_blocked;
if ((cpu_used + cpu_blocked) < sched_clutch_bucket_adjust_threshold) {
return;
}
new_cpu_data.cpu_data.scbcd_cpu_used = cpu_used / SCHED_CLUTCH_BUCKET_ADJUST_RATIO;
new_cpu_data.cpu_data.scbcd_cpu_blocked = cpu_blocked / SCHED_CLUTCH_BUCKET_ADJUST_RATIO;
} while (!os_atomic_cmpxchg(&clutch_bucket->scb_cpu_data.scbcd_cpu_data_packed, old_cpu_data.scbcd_cpu_data_packed, new_cpu_data.scbcd_cpu_data_packed, relaxed));
}
uint32_t
sched_clutch_thread_run_bucket_incr(
thread_t thread,
sched_bucket_t bucket)
{
if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) {
return 0;
}
sched_clutch_t clutch = sched_clutch_for_thread(thread);
return sched_clutch_run_bucket_incr(clutch, bucket);
}
static uint32_t
sched_clutch_run_bucket_incr(
sched_clutch_t clutch,
sched_bucket_t bucket)
{
assert(bucket != TH_BUCKET_RUN);
sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[bucket]);
uint32_t result = os_atomic_inc(&(clutch_bucket->scb_run_count), relaxed);
return result;
}
uint32_t
sched_clutch_thread_run_bucket_decr(
thread_t thread,
sched_bucket_t bucket)
{
if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) {
return 0;
}
sched_clutch_t clutch = sched_clutch_for_thread(thread);
return sched_clutch_run_bucket_decr(clutch, bucket);
}
static uint32_t
sched_clutch_run_bucket_decr(
sched_clutch_t clutch,
sched_bucket_t bucket)
{
assert(bucket != TH_BUCKET_RUN);
sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[bucket]);
uint32_t result = os_atomic_dec(&(clutch_bucket->scb_run_count), relaxed);
return result;
}
static void
sched_clutch_bucket_timeshare_update(
sched_clutch_bucket_t clutch_bucket)
{
if (clutch_bucket->scb_bucket < TH_BUCKET_SHARE_FG) {
return;
}
uint32_t bucket_sched_ts = os_atomic_load(&clutch_bucket->scb_timeshare_tick, relaxed);
uint32_t current_sched_ts = sched_tick;
if (bucket_sched_ts != current_sched_ts) {
os_atomic_store(&clutch_bucket->scb_timeshare_tick, current_sched_ts, relaxed);
uint32_t bucket_load = (os_atomic_load(&clutch_bucket->scb_run_count, relaxed) / processor_avail_count);
bucket_load = MIN(bucket_load, NRQS - 1);
uint32_t pri_shift = sched_fixed_shift - sched_load_shifts[bucket_load];
os_atomic_store(&clutch_bucket->scb_pri_shift, pri_shift, relaxed);
}
}
void
sched_clutch_thread_clutch_update(
thread_t thread,
sched_clutch_t old_clutch,
sched_clutch_t new_clutch)
{
uint32_t cpu_delta;
assert(current_thread() == thread);
if (old_clutch) {
sched_clutch_run_bucket_decr(old_clutch, thread->th_sched_bucket);
thread_timer_delta(thread, cpu_delta);
if (thread->pri_shift < INT8_MAX) {
thread->sched_usage += cpu_delta;
}
thread->cpu_delta += cpu_delta;
sched_clutch_bucket_cpu_usage_update(&(old_clutch->sc_clutch_buckets[thread->th_sched_bucket]), cpu_delta);
}
if (new_clutch) {
sched_clutch_run_bucket_incr(new_clutch, thread->th_sched_bucket);
}
}
static boolean_t
sched_clutch_thread_insert(
sched_clutch_root_t root_clutch,
thread_t thread,
integer_t options)
{
boolean_t result = FALSE;
sched_clutch_hierarchy_locked_assert(root_clutch);
sched_clutch_t clutch = sched_clutch_for_thread(thread);
assert(thread->thread_group == clutch->sc_tg);
uint64_t current_timestamp = mach_absolute_time();
sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]);
assert((clutch_bucket->scb_root == NULL) || (clutch_bucket->scb_root == root_clutch));
run_queue_enqueue(&clutch_bucket->scb_runq, thread, options);
sched_clutch_root_urgency_inc(root_clutch, thread);
priority_queue_insert(&clutch_bucket->scb_clutchpri_prioq, &thread->sched_clutchpri_link,
sched_thread_sched_pri_promoted(thread) ? thread->sched_pri : thread->base_pri,
PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
os_atomic_inc(&clutch->sc_thr_count, relaxed);
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_STATE) | DBG_FUNC_NONE,
thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, thread_tid(thread), SCHED_CLUTCH_STATE_RUNNABLE, 0);
if (clutch_bucket->scb_thr_count == 0) {
sched_clutch_thr_count_inc(&clutch_bucket->scb_thr_count);
sched_clutch_thr_count_inc(&root_clutch->scr_thr_count);
result = sched_clutch_bucket_runnable(clutch_bucket, root_clutch, current_timestamp);
} else {
sched_clutch_thr_count_inc(&clutch_bucket->scb_thr_count);
sched_clutch_thr_count_inc(&root_clutch->scr_thr_count);
result = sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp);
}
return result;
}
static void
sched_clutch_thread_remove(
sched_clutch_root_t root_clutch,
thread_t thread,
uint64_t current_timestamp)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
sched_clutch_t clutch = sched_clutch_for_thread(thread);
assert(thread->thread_group == clutch->sc_tg);
assert(thread->runq != PROCESSOR_NULL);
sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]);
assert(clutch_bucket->scb_root == root_clutch);
sched_clutch_root_urgency_dec(root_clutch, thread);
run_queue_remove(&clutch_bucket->scb_runq, thread);
priority_queue_remove(&clutch_bucket->scb_clutchpri_prioq, &thread->sched_clutchpri_link,
PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_STATE) | DBG_FUNC_NONE,
thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, thread_tid(thread), SCHED_CLUTCH_STATE_EMPTY, 0);
os_atomic_dec(&clutch->sc_thr_count, relaxed);
sched_clutch_thr_count_dec(&root_clutch->scr_thr_count);
sched_clutch_thr_count_dec(&clutch_bucket->scb_thr_count);
if (clutch_bucket->scb_thr_count == 0) {
sched_clutch_bucket_empty(clutch_bucket, root_clutch, current_timestamp);
} else {
sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp);
}
}
static thread_t
sched_clutch_thread_highest(
sched_clutch_root_t root_clutch)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
uint64_t current_timestamp = mach_absolute_time();
sched_clutch_root_bucket_t root_bucket = sched_clutch_root_highest_root_bucket(root_clutch, current_timestamp);
if (root_bucket == NULL) {
return THREAD_NULL;
}
sched_clutch_root_bucket_deadline_update(root_bucket, root_clutch, current_timestamp);
sched_clutch_bucket_t clutch_bucket = sched_clutch_root_bucket_highest_clutch_bucket(root_bucket);
assert(clutch_bucket != NULL);
thread_t thread = run_queue_peek(&clutch_bucket->scb_runq);
assert(thread != NULL);
sched_clutch_thread_remove(root_clutch, thread, current_timestamp);
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_SELECT) | DBG_FUNC_NONE,
thread_tid(thread), thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, 0, 0);
return thread;
}
static uint32_t
sched_clutch_root_urgency(
sched_clutch_root_t root_clutch)
{
return root_clutch->scr_urgency;
}
static uint32_t
sched_clutch_root_count_sum(
__unused sched_clutch_root_t root_clutch)
{
return 0;
}
static int
sched_clutch_root_priority(
sched_clutch_root_t root_clutch)
{
return root_clutch->scr_priority;
}
uint32_t
sched_clutch_root_count(
sched_clutch_root_t root_clutch)
{
return root_clutch->scr_thr_count;
}
uint32_t
sched_clutch_thread_pri_shift(
thread_t thread,
sched_bucket_t bucket)
{
if (!SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) {
return UINT8_MAX;
}
assert(bucket != TH_BUCKET_RUN);
sched_clutch_t clutch = sched_clutch_for_thread(thread);
sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[bucket]);
return os_atomic_load(&clutch_bucket->scb_pri_shift, relaxed);
}
#pragma mark -- Clutch Scheduler Algorithm
static void
sched_clutch_init(void);
static void
sched_clutch_timebase_init(void);
static thread_t
sched_clutch_steal_thread(processor_set_t pset);
static void
sched_clutch_thread_update_scan(sched_update_scan_context_t scan_context);
static boolean_t
sched_clutch_processor_enqueue(processor_t processor, thread_t thread,
sched_options_t options);
static boolean_t
sched_clutch_processor_queue_remove(processor_t processor, thread_t thread);
static ast_t
sched_clutch_processor_csw_check(processor_t processor);
static boolean_t
sched_clutch_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte);
static int
sched_clutch_runq_count(processor_t processor);
static boolean_t
sched_clutch_processor_queue_empty(processor_t processor);
static uint64_t
sched_clutch_runq_stats_count_sum(processor_t processor);
static int
sched_clutch_processor_bound_count(processor_t processor);
static void
sched_clutch_pset_init(processor_set_t pset);
static void
sched_clutch_processor_init(processor_t processor);
static thread_t
sched_clutch_choose_thread(processor_t processor, int priority, ast_t reason);
static void
sched_clutch_processor_queue_shutdown(processor_t processor);
static sched_mode_t
sched_clutch_initial_thread_sched_mode(task_t parent_task);
static uint32_t
sched_clutch_initial_quantum_size(thread_t thread);
static bool
sched_clutch_thread_avoid_processor(processor_t processor, thread_t thread);
static uint32_t
sched_clutch_run_incr(thread_t thread);
static uint32_t
sched_clutch_run_decr(thread_t thread);
static void
sched_clutch_update_thread_bucket(thread_t thread);
const struct sched_dispatch_table sched_clutch_dispatch = {
.sched_name = "clutch",
.init = sched_clutch_init,
.timebase_init = sched_clutch_timebase_init,
.processor_init = sched_clutch_processor_init,
.pset_init = sched_clutch_pset_init,
.maintenance_continuation = sched_timeshare_maintenance_continue,
.choose_thread = sched_clutch_choose_thread,
.steal_thread_enabled = sched_steal_thread_enabled,
.steal_thread = sched_clutch_steal_thread,
.compute_timeshare_priority = sched_compute_timeshare_priority,
.choose_processor = choose_processor,
.processor_enqueue = sched_clutch_processor_enqueue,
.processor_queue_shutdown = sched_clutch_processor_queue_shutdown,
.processor_queue_remove = sched_clutch_processor_queue_remove,
.processor_queue_empty = sched_clutch_processor_queue_empty,
.priority_is_urgent = priority_is_urgent,
.processor_csw_check = sched_clutch_processor_csw_check,
.processor_queue_has_priority = sched_clutch_processor_queue_has_priority,
.initial_quantum_size = sched_clutch_initial_quantum_size,
.initial_thread_sched_mode = sched_clutch_initial_thread_sched_mode,
.can_update_priority = can_update_priority,
.update_priority = update_priority,
.lightweight_update_priority = lightweight_update_priority,
.quantum_expire = sched_default_quantum_expire,
.processor_runq_count = sched_clutch_runq_count,
.processor_runq_stats_count_sum = sched_clutch_runq_stats_count_sum,
.processor_bound_count = sched_clutch_processor_bound_count,
.thread_update_scan = sched_clutch_thread_update_scan,
.multiple_psets_enabled = TRUE,
.sched_groups_enabled = FALSE,
.avoid_processor_enabled = TRUE,
.thread_avoid_processor = sched_clutch_thread_avoid_processor,
.processor_balance = sched_SMT_balance,
.rt_runq = sched_rtglobal_runq,
.rt_init = sched_rtglobal_init,
.rt_queue_shutdown = sched_rtglobal_queue_shutdown,
.rt_runq_scan = sched_rtglobal_runq_scan,
.rt_runq_count_sum = sched_rtglobal_runq_count_sum,
.qos_max_parallelism = sched_qos_max_parallelism,
.check_spill = sched_check_spill,
.ipi_policy = sched_ipi_policy,
.thread_should_yield = sched_thread_should_yield,
.run_count_incr = sched_clutch_run_incr,
.run_count_decr = sched_clutch_run_decr,
.update_thread_bucket = sched_clutch_update_thread_bucket,
.pset_made_schedulable = sched_pset_made_schedulable,
};
__attribute__((always_inline))
static inline run_queue_t
sched_clutch_bound_runq(processor_t processor)
{
return &processor->runq;
}
__attribute__((always_inline))
static inline sched_clutch_root_t
sched_clutch_processor_root_clutch(processor_t processor)
{
return &processor->processor_set->pset_clutch_root;
}
__attribute__((always_inline))
static inline run_queue_t
sched_clutch_thread_bound_runq(processor_t processor, __assert_only thread_t thread)
{
assert(thread->bound_processor == processor);
return sched_clutch_bound_runq(processor);
}
static uint32_t
sched_clutch_initial_quantum_size(thread_t thread)
{
if (thread == THREAD_NULL) {
return std_quantum;
}
assert(sched_clutch_thread_quantum[thread->th_sched_bucket] <= UINT32_MAX);
return (uint32_t)sched_clutch_thread_quantum[thread->th_sched_bucket];
}
static sched_mode_t
sched_clutch_initial_thread_sched_mode(task_t parent_task)
{
if (parent_task == kernel_task) {
return TH_MODE_FIXED;
} else {
return TH_MODE_TIMESHARE;
}
}
static void
sched_clutch_processor_init(processor_t processor)
{
run_queue_init(&processor->runq);
}
static void
sched_clutch_pset_init(processor_set_t pset)
{
sched_clutch_root_init(&pset->pset_clutch_root, pset);
}
static void
sched_clutch_init(void)
{
if (!PE_parse_boot_argn("sched_clutch_bucket_interactive_pri", &sched_clutch_bucket_interactive_pri, sizeof(sched_clutch_bucket_interactive_pri))) {
sched_clutch_bucket_interactive_pri = SCHED_CLUTCH_BUCKET_INTERACTIVE_PRI_DEFAULT;
}
if (!PE_parse_boot_argn("sched_clutch_bucket_pri_boost", &sched_clutch_bucket_pri_boost, sizeof(sched_clutch_bucket_pri_boost))) {
sched_clutch_bucket_pri_boost = SCHED_CLUTCH_BUCKET_PRI_BOOST_DEFAULT;
}
sched_timeshare_init();
}
static void
sched_clutch_timebase_init(void)
{
sched_timeshare_timebase_init();
sched_clutch_us_to_abstime(sched_clutch_root_bucket_wcel_us, sched_clutch_root_bucket_wcel);
sched_clutch_us_to_abstime(sched_clutch_root_bucket_warp_us, sched_clutch_root_bucket_warp);
sched_clutch_us_to_abstime(sched_clutch_thread_quantum_us, sched_clutch_thread_quantum);
clock_interval_to_absolutetime_interval(SCHED_CLUTCH_BUCKET_ADJUST_THRESHOLD_USECS,
NSEC_PER_USEC, &sched_clutch_bucket_adjust_threshold);
uint32_t interactivity_delta = 0;
if (!PE_parse_boot_argn("sched_clutch_bucket_interactivity_delta_usecs", &interactivity_delta, sizeof(interactivity_delta))) {
interactivity_delta = SCHED_CLUTCH_BUCKET_INTERACTIVITY_DELTA_USECS_DEFAULT;
}
clock_interval_to_absolutetime_interval(interactivity_delta, NSEC_PER_USEC, &sched_clutch_bucket_interactivity_delta);
}
static thread_t
sched_clutch_choose_thread(
processor_t processor,
int priority,
__unused ast_t reason)
{
int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor));
uint32_t clutch_count = sched_clutch_root_count(sched_clutch_processor_root_clutch(processor));
run_queue_t bound_runq = sched_clutch_bound_runq(processor);
boolean_t choose_from_boundq = false;
if (bound_runq->highq < priority &&
clutch_pri < priority) {
return THREAD_NULL;
}
if (bound_runq->count && clutch_count) {
if (bound_runq->highq >= clutch_pri) {
choose_from_boundq = true;
}
} else if (bound_runq->count) {
choose_from_boundq = true;
} else if (clutch_count) {
choose_from_boundq = false;
} else {
return THREAD_NULL;
}
thread_t thread = THREAD_NULL;
if (choose_from_boundq == false) {
sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor);
thread = sched_clutch_thread_highest(pset_clutch_root);
} else {
thread = run_queue_dequeue(bound_runq, SCHED_HEADQ);
}
return thread;
}
static boolean_t
sched_clutch_processor_enqueue(
processor_t processor,
thread_t thread,
sched_options_t options)
{
boolean_t result;
thread->runq = processor;
if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) {
sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor);
result = sched_clutch_thread_insert(pset_clutch_root, thread, options);
} else {
run_queue_t rq = sched_clutch_thread_bound_runq(processor, thread);
result = run_queue_enqueue(rq, thread, options);
}
return result;
}
static boolean_t
sched_clutch_processor_queue_empty(processor_t processor)
{
return sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)) == 0 &&
sched_clutch_bound_runq(processor)->count == 0;
}
static ast_t
sched_clutch_processor_csw_check(processor_t processor)
{
boolean_t has_higher;
int pri;
if (sched_clutch_thread_avoid_processor(processor, current_thread())) {
return AST_PREEMPT | AST_URGENT;
}
run_queue_t bound_runq = sched_clutch_bound_runq(processor);
int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor));
assert(processor->active_thread != NULL);
pri = MAX(clutch_pri, bound_runq->highq);
if (processor->first_timeslice) {
has_higher = (pri > processor->current_pri);
} else {
has_higher = (pri >= processor->current_pri);
}
if (has_higher) {
if (sched_clutch_root_urgency(sched_clutch_processor_root_clutch(processor)) > 0) {
return AST_PREEMPT | AST_URGENT;
}
if (bound_runq->urgency > 0) {
return AST_PREEMPT | AST_URGENT;
}
return AST_PREEMPT;
}
return AST_NONE;
}
static boolean_t
sched_clutch_processor_queue_has_priority(processor_t processor,
int priority,
boolean_t gte)
{
run_queue_t bound_runq = sched_clutch_bound_runq(processor);
int qpri = MAX(sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)), bound_runq->highq);
if (gte) {
return qpri >= priority;
} else {
return qpri > priority;
}
}
static int
sched_clutch_runq_count(processor_t processor)
{
return (int)sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)) + sched_clutch_bound_runq(processor)->count;
}
static uint64_t
sched_clutch_runq_stats_count_sum(processor_t processor)
{
uint64_t bound_sum = sched_clutch_bound_runq(processor)->runq_stats.count_sum;
if (processor->cpu_id == processor->processor_set->cpu_set_low) {
return bound_sum + sched_clutch_root_count_sum(sched_clutch_processor_root_clutch(processor));
} else {
return bound_sum;
}
}
static int
sched_clutch_processor_bound_count(processor_t processor)
{
return sched_clutch_bound_runq(processor)->count;
}
static void
sched_clutch_processor_queue_shutdown(processor_t processor)
{
processor_set_t pset = processor->processor_set;
sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor);
thread_t thread;
queue_head_t tqueue;
if (pset->online_processor_count > 0) {
pset_unlock(pset);
return;
}
queue_init(&tqueue);
while (sched_clutch_root_count(pset_clutch_root) > 0) {
thread = sched_clutch_thread_highest(pset_clutch_root);
enqueue_tail(&tqueue, &thread->runq_links);
}
pset_unlock(pset);
qe_foreach_element_safe(thread, &tqueue, runq_links) {
remqueue(&thread->runq_links);
thread_lock(thread);
thread_setrun(thread, SCHED_TAILQ);
thread_unlock(thread);
}
}
static boolean_t
sched_clutch_processor_queue_remove(
processor_t processor,
thread_t thread)
{
run_queue_t rq;
processor_set_t pset = processor->processor_set;
pset_lock(pset);
if (processor == thread->runq) {
if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) {
sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor);
sched_clutch_thread_remove(pset_clutch_root, thread, mach_absolute_time());
} else {
rq = sched_clutch_thread_bound_runq(processor, thread);
run_queue_remove(rq, thread);
}
} else {
assert(thread->runq == PROCESSOR_NULL);
processor = PROCESSOR_NULL;
}
pset_unlock(pset);
return processor != PROCESSOR_NULL;
}
static thread_t
sched_clutch_steal_thread(processor_set_t pset)
{
processor_set_t nset, cset = pset;
thread_t thread;
do {
sched_clutch_root_t pset_clutch_root = &cset->pset_clutch_root;
if (sched_clutch_root_count(pset_clutch_root) > 0) {
thread = sched_clutch_thread_highest(pset_clutch_root);
pset_unlock(cset);
return thread;
}
nset = next_pset(cset);
if (nset != pset) {
pset_unlock(cset);
cset = nset;
pset_lock(cset);
}
} while (nset != pset);
pset_unlock(cset);
return THREAD_NULL;
}
static void
sched_clutch_thread_update_scan(sched_update_scan_context_t scan_context)
{
boolean_t restart_needed = FALSE;
processor_t processor = processor_list;
processor_set_t pset;
thread_t thread;
spl_t s;
do {
do {
pset = processor->processor_set;
s = splsched();
pset_lock(pset);
restart_needed = runq_scan(sched_clutch_bound_runq(processor), scan_context);
pset_unlock(pset);
splx(s);
if (restart_needed) {
break;
}
thread = processor->idle_thread;
if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
if (thread_update_add_thread(thread) == FALSE) {
restart_needed = TRUE;
break;
}
}
} while ((processor = processor->processor_list) != NULL);
thread_update_process_threads();
} while (restart_needed);
pset = &pset0;
do {
do {
s = splsched();
pset_lock(pset);
if (sched_clutch_root_count(&pset->pset_clutch_root) > 0) {
queue_t clutch_bucket_list = &pset->pset_clutch_root.scr_clutch_buckets;
sched_clutch_bucket_t clutch_bucket;
qe_foreach_element(clutch_bucket, clutch_bucket_list, scb_listlink) {
sched_clutch_bucket_timeshare_update(clutch_bucket);
restart_needed = runq_scan(&clutch_bucket->scb_runq, scan_context);
if (restart_needed) {
break;
}
}
}
pset_unlock(pset);
splx(s);
if (restart_needed) {
break;
}
} while ((pset = pset->pset_list) != NULL);
thread_update_process_threads();
} while (restart_needed);
}
extern int sched_allow_rt_smt;
static bool
sched_clutch_thread_avoid_processor(processor_t processor, thread_t thread)
{
if (processor->processor_primary != processor) {
if ((processor->processor_primary->current_pri >= BASEPRI_RTQUEUES) && ((thread->sched_pri < BASEPRI_RTQUEUES) || !sched_allow_rt_smt)) {
return true;
}
}
return false;
}
static uint32_t
sched_clutch_run_incr(thread_t thread)
{
assert((thread->state & (TH_RUN | TH_IDLE)) == TH_RUN);
uint32_t new_count = os_atomic_inc(&sched_run_buckets[TH_BUCKET_RUN], relaxed);
sched_clutch_thread_run_bucket_incr(thread, thread->th_sched_bucket);
return new_count;
}
static uint32_t
sched_clutch_run_decr(thread_t thread)
{
assert((thread->state & (TH_RUN | TH_IDLE)) != TH_RUN);
uint32_t new_count = os_atomic_dec(&sched_run_buckets[TH_BUCKET_RUN], relaxed);
sched_clutch_thread_run_bucket_decr(thread, thread->th_sched_bucket);
return new_count;
}
static sched_bucket_t
sched_convert_pri_to_bucket(uint8_t priority)
{
sched_bucket_t bucket = TH_BUCKET_RUN;
if (priority > BASEPRI_USER_INITIATED) {
bucket = TH_BUCKET_SHARE_FG;
} else if (priority > BASEPRI_DEFAULT) {
bucket = TH_BUCKET_SHARE_IN;
} else if (priority > BASEPRI_UTILITY) {
bucket = TH_BUCKET_SHARE_DF;
} else if (priority > MAXPRI_THROTTLE) {
bucket = TH_BUCKET_SHARE_UT;
} else {
bucket = TH_BUCKET_SHARE_BG;
}
return bucket;
}
static boolean_t
sched_thread_sched_pri_promoted(thread_t thread)
{
return (thread->sched_flags & TH_SFLAG_PROMOTED) ||
(thread->sched_flags & TH_SFLAG_PROMOTE_REASON_MASK) ||
(thread->sched_flags & TH_SFLAG_DEMOTED_MASK) ||
(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) ||
(thread->kern_promotion_schedpri != 0);
}
void
sched_clutch_update_thread_bucket(thread_t thread)
{
sched_bucket_t old_bucket = thread->th_sched_bucket;
sched_bucket_t new_bucket = TH_BUCKET_RUN;
assert(thread->runq == PROCESSOR_NULL);
int pri = (sched_thread_sched_pri_promoted(thread)) ? thread->sched_pri : thread->base_pri;
switch (thread->sched_mode) {
case TH_MODE_FIXED:
if (pri >= BASEPRI_FOREGROUND) {
new_bucket = TH_BUCKET_FIXPRI;
} else {
new_bucket = sched_convert_pri_to_bucket(pri);
}
break;
case TH_MODE_REALTIME:
new_bucket = TH_BUCKET_FIXPRI;
break;
case TH_MODE_TIMESHARE:
new_bucket = sched_convert_pri_to_bucket(pri);
break;
default:
panic("unexpected mode: %d", thread->sched_mode);
break;
}
if (old_bucket == new_bucket) {
return;
}
thread->th_sched_bucket = new_bucket;
thread->pri_shift = sched_clutch_thread_pri_shift(thread, new_bucket);
if ((thread->state & (TH_RUN | TH_IDLE)) == TH_RUN) {
sched_clutch_thread_run_bucket_decr(thread, old_bucket);
sched_clutch_thread_run_bucket_incr(thread, new_bucket);
}
}
#endif