#include <debug.h>
#include <mach_kdb.h>
#include <ddb/db_output.h>
#include <mach/mach_types.h>
#include <mach/machine.h>
#include <mach/policy.h>
#include <mach/sync_policy.h>
#include <machine/machine_routines.h>
#include <machine/sched_param.h>
#include <machine/machine_cpu.h>
#include <kern/kern_types.h>
#include <kern/clock.h>
#include <kern/counters.h>
#include <kern/cpu_number.h>
#include <kern/cpu_data.h>
#include <kern/debug.h>
#include <kern/lock.h>
#include <kern/macro_help.h>
#include <kern/machine.h>
#include <kern/misc_protos.h>
#include <kern/processor.h>
#include <kern/queue.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
#include <kern/syscall_subr.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/wait_queue.h>
#include <vm/pmap.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
#include <mach/sdt.h>
#include <sys/kdebug.h>
#include <kern/pms.h>
struct run_queue rt_runq;
#define RT_RUNQ ((processor_t)-1)
decl_simple_lock_data(static,rt_lock);
#define DEFAULT_PREEMPTION_RATE 100
int default_preemption_rate = DEFAULT_PREEMPTION_RATE;
#define MAX_UNSAFE_QUANTA 800
int max_unsafe_quanta = MAX_UNSAFE_QUANTA;
#define MAX_POLL_QUANTA 2
int max_poll_quanta = MAX_POLL_QUANTA;
#define SCHED_POLL_YIELD_SHIFT 4
int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
uint64_t max_unsafe_computation;
uint32_t sched_safe_duration;
uint64_t max_poll_computation;
uint32_t std_quantum;
uint32_t min_std_quantum;
uint32_t std_quantum_us;
uint32_t max_rt_quantum;
uint32_t min_rt_quantum;
uint32_t sched_cswtime;
unsigned sched_tick;
uint32_t sched_tick_interval;
uint32_t sched_pri_shift = INT8_MAX;
uint32_t sched_fixed_shift;
uint32_t sched_run_count, sched_share_count;
uint32_t sched_load_average, sched_mach_factor;
static void load_shift_init(void) __attribute__((section("__TEXT, initcode")));
static void preempt_pri_init(void) __attribute__((section("__TEXT, initcode")));
static thread_t run_queue_dequeue(
run_queue_t runq,
integer_t options);
static thread_t choose_thread(
processor_t processor,
int priority);
static thread_t thread_select_idle(
thread_t thread,
processor_t processor);
static thread_t processor_idle(
thread_t thread,
processor_t processor);
static thread_t steal_thread(
processor_set_t pset);
static thread_t steal_processor_thread(
processor_t processor);
static void thread_update_scan(void);
#if DEBUG
extern int debug_task;
#define TLOG(a, fmt, args...) if(debug_task & a) kprintf(fmt, ## args)
#else
#define TLOG(a, fmt, args...) do {} while (0)
#endif
#if DEBUG
static
boolean_t thread_runnable(
thread_t thread);
#endif
int8_t sched_load_shifts[NRQS];
int sched_preempt_pri[NRQBM];
void
sched_init(void)
{
if (default_preemption_rate < 1)
default_preemption_rate = DEFAULT_PREEMPTION_RATE;
std_quantum_us = (1000 * 1000) / default_preemption_rate;
printf("standard timeslicing quantum is %d us\n", std_quantum_us);
sched_safe_duration = (2 * max_unsafe_quanta / default_preemption_rate) *
(1 << SCHED_TICK_SHIFT);
load_shift_init();
preempt_pri_init();
simple_lock_init(&rt_lock, 0);
run_queue_init(&rt_runq);
sched_tick = 0;
ast_init();
}
void
sched_timebase_init(void)
{
uint64_t abstime;
uint32_t shift;
clock_interval_to_absolutetime_interval(
std_quantum_us, NSEC_PER_USEC, &abstime);
assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
std_quantum = (uint32_t)abstime;
clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime);
assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
min_std_quantum = (uint32_t)abstime;
clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
min_rt_quantum = (uint32_t)abstime;
clock_interval_to_absolutetime_interval(
50, 1000*NSEC_PER_USEC, &abstime);
assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
max_rt_quantum = (uint32_t)abstime;
clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
NSEC_PER_USEC, &abstime);
assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
sched_tick_interval = (uint32_t)abstime;
abstime = (abstime * 5) / 3;
for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift)
abstime >>= 1;
sched_fixed_shift = shift;
max_unsafe_computation = max_unsafe_quanta * std_quantum;
max_poll_computation = max_poll_quanta * std_quantum;
}
static void
load_shift_init(void)
{
int8_t k, *p = sched_load_shifts;
uint32_t i, j;
*p++ = INT8_MIN; *p++ = 0;
for (i = j = 2, k = 1; i < NRQS; ++k) {
for (j <<= 1; i < j; ++i)
*p++ = k;
}
}
static void
preempt_pri_init(void)
{
int i, *p = sched_preempt_pri;
for (i = BASEPRI_FOREGROUND + 1; i < MINPRI_KERNEL; ++i)
setbit(i, p);
for (i = BASEPRI_PREEMPT; i <= MAXPRI; ++i)
setbit(i, p);
}
void
thread_timer_expire(
void *p0,
__unused void *p1)
{
thread_t thread = p0;
spl_t s;
s = splsched();
thread_lock(thread);
if (--thread->wait_timer_active == 0) {
if (thread->wait_timer_is_set) {
thread->wait_timer_is_set = FALSE;
clear_wait_internal(thread, THREAD_TIMED_OUT);
}
}
thread_unlock(thread);
splx(s);
}
#ifndef __LP64__
void
thread_set_timer(
uint32_t interval,
uint32_t scale_factor)
{
thread_t thread = current_thread();
uint64_t deadline;
spl_t s;
s = splsched();
thread_lock(thread);
if ((thread->state & TH_WAIT) != 0) {
clock_interval_to_deadline(interval, scale_factor, &deadline);
if (!timer_call_enter(&thread->wait_timer, deadline))
thread->wait_timer_active++;
thread->wait_timer_is_set = TRUE;
}
thread_unlock(thread);
splx(s);
}
void
thread_set_timer_deadline(
uint64_t deadline)
{
thread_t thread = current_thread();
spl_t s;
s = splsched();
thread_lock(thread);
if ((thread->state & TH_WAIT) != 0) {
if (!timer_call_enter(&thread->wait_timer, deadline))
thread->wait_timer_active++;
thread->wait_timer_is_set = TRUE;
}
thread_unlock(thread);
splx(s);
}
void
thread_cancel_timer(void)
{
thread_t thread = current_thread();
spl_t s;
s = splsched();
thread_lock(thread);
if (thread->wait_timer_is_set) {
if (timer_call_cancel(&thread->wait_timer))
thread->wait_timer_active--;
thread->wait_timer_is_set = FALSE;
}
thread_unlock(thread);
splx(s);
}
#endif
boolean_t
thread_unblock(
thread_t thread,
wait_result_t wresult)
{
boolean_t result = FALSE;
thread->wait_result = wresult;
if (thread->wait_timer_is_set) {
if (timer_call_cancel(&thread->wait_timer))
thread->wait_timer_active--;
thread->wait_timer_is_set = FALSE;
}
thread->state &= ~(TH_WAIT|TH_UNINT);
if (!(thread->state & TH_RUN)) {
thread->state |= TH_RUN;
(*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
sched_run_incr();
if (thread->sched_mode & TH_MODE_TIMESHARE)
sched_share_incr();
}
else {
if (thread->state & TH_IDLE) {
processor_t processor = thread->last_processor;
if (processor != current_processor())
machine_signal_idle(processor);
}
result = TRUE;
}
if (thread->sched_mode & TH_MODE_REALTIME) {
thread->realtime.deadline = mach_absolute_time();
thread->realtime.deadline += thread->realtime.constraint;
}
thread->current_quantum = 0;
thread->computation_metered = 0;
thread->reason = AST_NONE;
KERNEL_DEBUG_CONSTANT(
MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
(uintptr_t)thread_tid(thread), thread->sched_pri, 0, 0, 0);
DTRACE_SCHED2(wakeup, struct thread *, thread, struct proc *, thread->task->bsd_info);
return (result);
}
kern_return_t
thread_go(
thread_t thread,
wait_result_t wresult)
{
assert(thread->at_safe_point == FALSE);
assert(thread->wait_event == NO_EVENT64);
assert(thread->wait_queue == WAIT_QUEUE_NULL);
if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) {
if (!thread_unblock(thread, wresult))
thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
return (KERN_SUCCESS);
}
return (KERN_NOT_WAITING);
}
__private_extern__
wait_result_t
thread_mark_wait_locked(
thread_t thread,
wait_interrupt_t interruptible)
{
boolean_t at_safe_point;
assert(thread == current_thread());
if (interruptible > (thread->options & TH_OPT_INTMASK))
interruptible = thread->options & TH_OPT_INTMASK;
at_safe_point = (interruptible == THREAD_ABORTSAFE);
if ( interruptible == THREAD_UNINT ||
!(thread->sched_mode & TH_MODE_ABORT) ||
(!at_safe_point &&
(thread->sched_mode & TH_MODE_ABORTSAFELY))) {
DTRACE_SCHED(sleep);
thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT);
thread->at_safe_point = at_safe_point;
return (thread->wait_result = THREAD_WAITING);
}
else
if (thread->sched_mode & TH_MODE_ABORTSAFELY)
thread->sched_mode &= ~TH_MODE_ISABORTED;
return (thread->wait_result = THREAD_INTERRUPTED);
}
__private_extern__
wait_interrupt_t
thread_interrupt_level(
wait_interrupt_t new_level)
{
thread_t thread = current_thread();
wait_interrupt_t result = thread->options & TH_OPT_INTMASK;
thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK);
return result;
}
boolean_t
assert_wait_possible(void)
{
thread_t thread;
#if DEBUG
if(debug_mode) return TRUE;
#endif
thread = current_thread();
return (thread == NULL || wait_queue_assert_possible(thread));
}
wait_result_t
assert_wait(
event_t event,
wait_interrupt_t interruptible)
{
register wait_queue_t wq;
register int index;
assert(event != NO_EVENT);
index = wait_hash(event);
wq = &wait_queues[index];
return wait_queue_assert_wait(wq, event, interruptible, 0);
}
wait_result_t
assert_wait_timeout(
event_t event,
wait_interrupt_t interruptible,
uint32_t interval,
uint32_t scale_factor)
{
thread_t thread = current_thread();
wait_result_t wresult;
wait_queue_t wqueue;
uint64_t deadline;
spl_t s;
assert(event != NO_EVENT);
wqueue = &wait_queues[wait_hash(event)];
s = splsched();
wait_queue_lock(wqueue);
thread_lock(thread);
clock_interval_to_deadline(interval, scale_factor, &deadline);
wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event),
interruptible, deadline, thread);
thread_unlock(thread);
wait_queue_unlock(wqueue);
splx(s);
return (wresult);
}
wait_result_t
assert_wait_deadline(
event_t event,
wait_interrupt_t interruptible,
uint64_t deadline)
{
thread_t thread = current_thread();
wait_result_t wresult;
wait_queue_t wqueue;
spl_t s;
assert(event != NO_EVENT);
wqueue = &wait_queues[wait_hash(event)];
s = splsched();
wait_queue_lock(wqueue);
thread_lock(thread);
wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event),
interruptible, deadline, thread);
thread_unlock(thread);
wait_queue_unlock(wqueue);
splx(s);
return (wresult);
}
__private_extern__ wait_result_t
thread_sleep_fast_usimple_lock(
event_t event,
simple_lock_t lock,
wait_interrupt_t interruptible)
{
wait_result_t res;
res = assert_wait(event, interruptible);
if (res == THREAD_WAITING) {
simple_unlock(lock);
res = thread_block(THREAD_CONTINUE_NULL);
simple_lock(lock);
}
return res;
}
wait_result_t
thread_sleep_usimple_lock(
event_t event,
usimple_lock_t lock,
wait_interrupt_t interruptible)
{
wait_result_t res;
res = assert_wait(event, interruptible);
if (res == THREAD_WAITING) {
usimple_unlock(lock);
res = thread_block(THREAD_CONTINUE_NULL);
usimple_lock(lock);
}
return res;
}
wait_result_t
thread_sleep_lock_write(
event_t event,
lock_t *lock,
wait_interrupt_t interruptible)
{
wait_result_t res;
res = assert_wait(event, interruptible);
if (res == THREAD_WAITING) {
lock_write_done(lock);
res = thread_block(THREAD_CONTINUE_NULL);
lock_write(lock);
}
return res;
}
boolean_t
thread_stop(
thread_t thread)
{
wait_result_t wresult;
spl_t s = splsched();
wake_lock(thread);
thread_lock(thread);
while (thread->state & TH_SUSP) {
thread->wake_active = TRUE;
thread_unlock(thread);
wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
wake_unlock(thread);
splx(s);
if (wresult == THREAD_WAITING)
wresult = thread_block(THREAD_CONTINUE_NULL);
if (wresult != THREAD_AWAKENED)
return (FALSE);
s = splsched();
wake_lock(thread);
thread_lock(thread);
}
thread->state |= TH_SUSP;
while (thread->state & TH_RUN) {
processor_t processor = thread->last_processor;
if (processor != PROCESSOR_NULL && processor->active_thread == thread)
cause_ast_check(processor);
thread->wake_active = TRUE;
thread_unlock(thread);
wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE);
wake_unlock(thread);
splx(s);
if (wresult == THREAD_WAITING)
wresult = thread_block(THREAD_CONTINUE_NULL);
if (wresult != THREAD_AWAKENED) {
thread_unstop(thread);
return (FALSE);
}
s = splsched();
wake_lock(thread);
thread_lock(thread);
}
thread_unlock(thread);
wake_unlock(thread);
splx(s);
return (TRUE);
}
void
thread_unstop(
thread_t thread)
{
spl_t s = splsched();
wake_lock(thread);
thread_lock(thread);
if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) {
thread->state &= ~TH_SUSP;
thread_unblock(thread, THREAD_AWAKENED);
thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
}
else
if (thread->state & TH_SUSP) {
thread->state &= ~TH_SUSP;
if (thread->wake_active) {
thread->wake_active = FALSE;
thread_unlock(thread);
thread_wakeup(&thread->wake_active);
wake_unlock(thread);
splx(s);
return;
}
}
thread_unlock(thread);
wake_unlock(thread);
splx(s);
}
void
thread_wait(
thread_t thread)
{
wait_result_t wresult;
spl_t s = splsched();
wake_lock(thread);
thread_lock(thread);
while (thread->state & TH_RUN) {
processor_t processor = thread->last_processor;
if (processor != PROCESSOR_NULL && processor->active_thread == thread)
cause_ast_check(processor);
thread->wake_active = TRUE;
thread_unlock(thread);
wresult = assert_wait(&thread->wake_active, THREAD_UNINT);
wake_unlock(thread);
splx(s);
if (wresult == THREAD_WAITING)
thread_block(THREAD_CONTINUE_NULL);
s = splsched();
wake_lock(thread);
thread_lock(thread);
}
thread_unlock(thread);
wake_unlock(thread);
splx(s);
}
__private_extern__ kern_return_t
clear_wait_internal(
thread_t thread,
wait_result_t wresult)
{
wait_queue_t wq = thread->wait_queue;
int i = LockTimeOut;
do {
if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT))
return (KERN_FAILURE);
if (wq != WAIT_QUEUE_NULL) {
if (wait_queue_lock_try(wq)) {
wait_queue_pull_thread_locked(wq, thread, TRUE);
}
else {
thread_unlock(thread);
delay(1);
thread_lock(thread);
if (wq != thread->wait_queue)
return (KERN_NOT_WAITING);
continue;
}
}
return (thread_go(thread, wresult));
} while (--i > 0);
panic("clear_wait_internal: deadlock: thread=%p, wq=%p, cpu=%d\n",
thread, wq, cpu_number());
return (KERN_FAILURE);
}
kern_return_t
clear_wait(
thread_t thread,
wait_result_t result)
{
kern_return_t ret;
spl_t s;
s = splsched();
thread_lock(thread);
ret = clear_wait_internal(thread, result);
thread_unlock(thread);
splx(s);
return ret;
}
kern_return_t
thread_wakeup_prim(
event_t event,
boolean_t one_thread,
wait_result_t result)
{
register wait_queue_t wq;
register int index;
index = wait_hash(event);
wq = &wait_queues[index];
if (one_thread)
return (wait_queue_wakeup_one(wq, event, result));
else
return (wait_queue_wakeup_all(wq, event, result));
}
processor_t
thread_bind(
processor_t processor)
{
thread_t self = current_thread();
processor_t prev;
spl_t s;
s = splsched();
thread_lock(self);
prev = self->bound_processor;
self->bound_processor = processor;
thread_unlock(self);
splx(s);
return (prev);
}
static thread_t
thread_select(
thread_t thread,
processor_t processor)
{
processor_set_t pset = processor->processor_set;
thread_t new_thread = THREAD_NULL;
boolean_t inactive_state;
do {
if (thread->sched_stamp != sched_tick)
update_priority(thread);
processor->current_pri = thread->sched_pri;
pset_lock(pset);
inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_cpu_is_inactive(processor->cpu_id);
simple_lock(&rt_lock);
if ( thread->state == TH_RUN &&
(thread->sched_pri >= BASEPRI_RTQUEUES ||
processor->processor_meta == PROCESSOR_META_NULL ||
processor->processor_meta->primary == processor) &&
(thread->bound_processor == PROCESSOR_NULL ||
thread->bound_processor == processor) &&
(thread->affinity_set == AFFINITY_SET_NULL ||
thread->affinity_set->aset_pset == pset) ) {
if ( thread->sched_pri >= BASEPRI_RTQUEUES &&
first_timeslice(processor) ) {
if (rt_runq.highq >= BASEPRI_RTQUEUES) {
register run_queue_t runq = &rt_runq;
register queue_t q;
q = runq->queues + runq->highq;
if (((thread_t)q->next)->realtime.deadline <
processor->deadline) {
thread = (thread_t)q->next;
((queue_entry_t)thread)->next->prev = q;
q->next = ((queue_entry_t)thread)->next;
thread->runq = PROCESSOR_NULL;
runq->count--; runq->urgency--;
assert(runq->urgency >= 0);
if (queue_empty(q)) {
if (runq->highq != IDLEPRI)
clrbit(MAXPRI - runq->highq, runq->bitmap);
runq->highq = MAXPRI - ffsbit(runq->bitmap);
}
}
}
simple_unlock(&rt_lock);
processor->deadline = thread->realtime.deadline;
pset_unlock(pset);
return (thread);
}
if (!inactive_state && rt_runq.highq < thread->sched_pri &&
(new_thread = choose_thread(processor, thread->sched_pri)) == THREAD_NULL) {
simple_unlock(&rt_lock);
pset_pri_hint(pset, processor, processor->current_pri);
pset_count_hint(pset, processor, processor->runq.count);
processor->deadline = UINT64_MAX;
pset_unlock(pset);
return (thread);
}
}
if (new_thread != THREAD_NULL ||
(processor->runq.highq >= rt_runq.highq &&
(new_thread = choose_thread(processor, MINPRI)) != THREAD_NULL)) {
simple_unlock(&rt_lock);
if (!inactive_state) {
pset_pri_hint(pset, processor, new_thread->sched_pri);
pset_count_hint(pset, processor, processor->runq.count);
}
processor->deadline = UINT64_MAX;
pset_unlock(pset);
return (new_thread);
}
if (rt_runq.count > 0) {
thread = run_queue_dequeue(&rt_runq, SCHED_HEADQ);
simple_unlock(&rt_lock);
processor->deadline = thread->realtime.deadline;
pset_unlock(pset);
return (thread);
}
simple_unlock(&rt_lock);
processor->deadline = UINT64_MAX;
if (inactive_state) {
if (processor->state == PROCESSOR_RUNNING)
remqueue(&pset->active_queue, (queue_entry_t)processor);
else
if (processor->state == PROCESSOR_IDLE)
remqueue(&pset->idle_queue, (queue_entry_t)processor);
processor->state = PROCESSOR_INACTIVE;
pset_unlock(pset);
return (processor->idle_thread);
}
new_thread = steal_thread(pset);
if (new_thread != THREAD_NULL)
return (new_thread);
if (processor->runq.count > 0 || rt_runq.count > 0)
continue;
pset_lock(pset);
if (processor->state == PROCESSOR_RUNNING) {
remqueue(&pset->active_queue, (queue_entry_t)processor);
processor->state = PROCESSOR_IDLE;
if (processor->processor_meta == PROCESSOR_META_NULL || processor->processor_meta->primary == processor) {
enqueue_head(&pset->idle_queue, (queue_entry_t)processor);
pset->low_pri = pset->low_count = processor;
}
else {
enqueue_head(&processor->processor_meta->idle_queue, (queue_entry_t)processor);
if (thread->sched_pri < BASEPRI_RTQUEUES) {
pset_unlock(pset);
return (processor->idle_thread);
}
}
}
pset_unlock(pset);
if ((thread->state & (TH_IDLE|TH_TERMINATE|TH_SUSP)) || !(thread->state & TH_WAIT) || thread->wake_active)
return (processor->idle_thread);
new_thread = thread_select_idle(thread, processor);
} while (new_thread == THREAD_NULL);
return (new_thread);
}
static thread_t
thread_select_idle(
thread_t thread,
processor_t processor)
{
thread_t new_thread;
if (thread->sched_mode & TH_MODE_TIMESHARE)
sched_share_decr();
sched_run_decr();
thread->state |= TH_IDLE;
processor->current_pri = IDLEPRI;
thread_unlock(thread);
processor->last_dispatch = mach_absolute_time();
thread_timer_event(processor->last_dispatch, &processor->idle_thread->system_timer);
PROCESSOR_DATA(processor, kernel_timer) = &processor->idle_thread->system_timer;
timer_call_cancel(&processor->quantum_timer);
processor->timeslice = 0;
(*thread->sched_call)(SCHED_CALL_BLOCK, thread);
spllo(); new_thread = processor_idle(thread, processor);
(*thread->sched_call)(SCHED_CALL_UNBLOCK, thread);
thread_lock(thread);
if (!(thread->state & TH_WAIT)) {
processor->last_dispatch = mach_absolute_time();
thread_timer_event(processor->last_dispatch, &thread->system_timer);
PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
thread_quantum_init(thread);
processor->quantum_end = processor->last_dispatch + thread->current_quantum;
timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end);
processor->timeslice = 1;
thread->computation_epoch = processor->last_dispatch;
}
thread->state &= ~TH_IDLE;
sched_run_incr();
if (thread->sched_mode & TH_MODE_TIMESHARE)
sched_share_incr();
return (new_thread);
}
static thread_t
choose_thread(
processor_t processor,
int priority)
{
run_queue_t rq = &processor->runq;
queue_t queue = rq->queues + rq->highq;
int pri = rq->highq, count = rq->count;
thread_t thread;
while (count > 0 && pri >= priority) {
thread = (thread_t)queue_first(queue);
while (!queue_end(queue, (queue_entry_t)thread)) {
if (thread->bound_processor == PROCESSOR_NULL ||
thread->bound_processor == processor) {
remqueue(queue, (queue_entry_t)thread);
thread->runq = PROCESSOR_NULL;
rq->count--;
if (testbit(pri, sched_preempt_pri)) {
rq->urgency--; assert(rq->urgency >= 0);
}
if (queue_empty(queue)) {
if (pri != IDLEPRI)
clrbit(MAXPRI - pri, rq->bitmap);
rq->highq = MAXPRI - ffsbit(rq->bitmap);
}
return (thread);
}
count--;
thread = (thread_t)queue_next((queue_entry_t)thread);
}
queue--; pri--;
}
return (THREAD_NULL);
}
#define funnel_release_check(thread, debug) \
MACRO_BEGIN \
if ((thread)->funnel_state & TH_FN_OWNED) { \
(thread)->funnel_state = TH_FN_REFUNNEL; \
KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, \
(thread)->funnel_lock, (debug), 0, 0, 0); \
funnel_unlock((thread)->funnel_lock); \
} \
MACRO_END
#define funnel_refunnel_check(thread, debug) \
MACRO_BEGIN \
if ((thread)->funnel_state & TH_FN_REFUNNEL) { \
kern_return_t result = (thread)->wait_result; \
\
(thread)->funnel_state = 0; \
KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, \
(thread)->funnel_lock, (debug), 0, 0, 0); \
funnel_lock((thread)->funnel_lock); \
KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE, \
(thread)->funnel_lock, (debug), 0, 0, 0); \
(thread)->funnel_state = TH_FN_OWNED; \
(thread)->wait_result = result; \
} \
MACRO_END
static boolean_t
thread_invoke(
register thread_t self,
register thread_t thread,
ast_t reason)
{
thread_continue_t continuation = self->continuation;
void *parameter = self->parameter;
processor_t processor;
if (get_preemption_level() != 0) {
int pl = get_preemption_level();
panic("thread_invoke: preemption_level %d, possible cause: %s",
pl, (pl < 0 ? "unlocking an unlocked mutex or spinlock" :
"blocking while holding a spinlock, or within interrupt context"));
}
assert(self == current_thread());
thread_lock(thread);
thread->state &= ~TH_UNINT;
#if DEBUG
assert(thread_runnable(thread));
#endif
if ((self->sched_mode & TH_MODE_REALTIME) && !self->reserved_stack)
self->reserved_stack = self->kernel_stack;
if (continuation != NULL) {
if (!thread->kernel_stack) {
if (self->kernel_stack == self->reserved_stack && !thread->reserved_stack)
goto need_stack;
continuation = thread->continuation;
parameter = thread->parameter;
processor = current_processor();
processor->active_thread = thread;
processor->current_pri = thread->sched_pri;
if (thread->last_processor != processor && thread->last_processor != NULL) {
if (thread->last_processor->processor_set != processor->processor_set)
thread->ps_switch++;
thread->p_switch++;
}
thread->last_processor = processor;
thread->c_switch++;
ast_context(thread);
thread_unlock(thread);
self->reason = reason;
processor->last_dispatch = mach_absolute_time();
thread_timer_event(processor->last_dispatch, &thread->system_timer);
PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE,
self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info);
TLOG(1, "thread_invoke: calling machine_stack_handoff\n");
machine_stack_handoff(self, thread);
DTRACE_SCHED(on__cpu);
thread_dispatch(self, thread);
thread->continuation = thread->parameter = NULL;
counter(c_thread_invoke_hits++);
funnel_refunnel_check(thread, 2);
(void) spllo();
assert(continuation);
call_continuation(continuation, parameter, thread->wait_result);
}
else if (thread == self) {
ast_context(self);
counter(++c_thread_invoke_same);
thread_unlock(self);
self->continuation = self->parameter = NULL;
funnel_refunnel_check(self, 3);
(void) spllo();
call_continuation(continuation, parameter, self->wait_result);
}
}
else {
if (!thread->kernel_stack) {
need_stack:
if (!stack_alloc_try(thread)) {
counter(c_thread_invoke_misses++);
thread_unlock(thread);
thread_stack_enqueue(thread);
return (FALSE);
}
}
else if (thread == self) {
ast_context(self);
counter(++c_thread_invoke_same);
thread_unlock(self);
return (TRUE);
}
}
processor = current_processor();
processor->active_thread = thread;
processor->current_pri = thread->sched_pri;
if (thread->last_processor != processor && thread->last_processor != NULL) {
if (thread->last_processor->processor_set != processor->processor_set)
thread->ps_switch++;
thread->p_switch++;
}
thread->last_processor = processor;
thread->c_switch++;
ast_context(thread);
thread_unlock(thread);
counter(c_thread_invoke_csw++);
assert(self->runq == PROCESSOR_NULL);
self->reason = reason;
processor->last_dispatch = mach_absolute_time();
thread_timer_event(processor->last_dispatch, &thread->system_timer);
PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info);
thread = machine_switch_context(self, continuation, thread);
TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self, continuation, thread);
DTRACE_SCHED(on__cpu);
thread_dispatch(thread, self);
if (continuation) {
self->continuation = self->parameter = NULL;
funnel_refunnel_check(self, 3);
(void) spllo();
call_continuation(continuation, parameter, self->wait_result);
}
return (TRUE);
}
void
thread_dispatch(
thread_t thread,
thread_t self)
{
processor_t processor = self->last_processor;
if (thread != THREAD_NULL) {
if (thread->continuation != NULL && thread->kernel_stack != 0)
stack_free(thread);
if (!(thread->state & TH_IDLE)) {
wake_lock(thread);
thread_lock(thread);
if ( first_timeslice(processor) &&
processor->quantum_end > processor->last_dispatch )
thread->current_quantum = (uint32_t)(processor->quantum_end - processor->last_dispatch);
else
thread->current_quantum = 0;
if (thread->sched_mode & TH_MODE_REALTIME) {
if (thread->current_quantum == 0) {
thread->realtime.deadline = UINT64_MAX;
thread->reason |= AST_QUANTUM;
}
}
else {
if (thread->current_quantum < min_std_quantum) {
thread->reason |= AST_QUANTUM;
thread->current_quantum += std_quantum;
}
}
if ((thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
self->current_quantum = thread->current_quantum;
thread->reason |= AST_QUANTUM;
thread->current_quantum = 0;
}
thread->computation_metered += (processor->last_dispatch - thread->computation_epoch);
if (!(thread->state & TH_WAIT)) {
if (thread->reason & AST_QUANTUM)
thread_setrun(thread, SCHED_TAILQ);
else
if (thread->reason & AST_PREEMPT)
thread_setrun(thread, SCHED_HEADQ);
else
thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
thread->reason = AST_NONE;
thread_unlock(thread);
wake_unlock(thread);
}
else {
thread->state &= ~TH_RUN;
if (thread->sched_mode & TH_MODE_TIMESHARE)
sched_share_decr();
sched_run_decr();
if (thread->wake_active) {
thread->wake_active = FALSE;
thread_unlock(thread);
thread_wakeup(&thread->wake_active);
}
else
thread_unlock(thread);
wake_unlock(thread);
(*thread->sched_call)(SCHED_CALL_BLOCK, thread);
if (thread->state & TH_TERMINATE)
thread_terminate_enqueue(thread);
}
}
}
if (!(self->state & TH_IDLE)) {
if (self->current_quantum == 0)
thread_quantum_init(self);
processor->quantum_end = (processor->last_dispatch + self->current_quantum);
timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end);
processor->timeslice = 1;
self->computation_epoch = processor->last_dispatch;
}
else {
timer_call_cancel(&processor->quantum_timer);
processor->timeslice = 0;
}
}
#include <libkern/OSDebug.h>
uint32_t kdebug_thread_block = 0;
counter(mach_counter_t c_thread_block_calls = 0;)
wait_result_t
thread_block_reason(
thread_continue_t continuation,
void *parameter,
ast_t reason)
{
register thread_t self = current_thread();
register processor_t processor;
register thread_t new_thread;
spl_t s;
counter(++c_thread_block_calls);
s = splsched();
if (!(reason & AST_PREEMPT))
funnel_release_check(self, 2);
processor = current_processor();
if (reason & AST_YIELD)
processor->timeslice = 0;
ast_off(AST_SCHEDULING);
self->continuation = continuation;
self->parameter = parameter;
if (kdebug_thread_block && kdebug_enable && self->state != TH_RUN) {
uint32_t bt[8];
OSBacktrace((void **)&bt[0], 8);
KERNEL_DEBUG_CONSTANT(0x140004c | DBG_FUNC_START, bt[0], bt[1], bt[2], bt[3], 0);
KERNEL_DEBUG_CONSTANT(0x140004c | DBG_FUNC_END, bt[4], bt[5], bt[6], bt[7], 0);
}
do {
thread_lock(self);
new_thread = thread_select(self, processor);
thread_unlock(self);
} while (!thread_invoke(self, new_thread, reason));
funnel_refunnel_check(self, 5);
splx(s);
return (self->wait_result);
}
wait_result_t
thread_block(
thread_continue_t continuation)
{
return thread_block_reason(continuation, NULL, AST_NONE);
}
wait_result_t
thread_block_parameter(
thread_continue_t continuation,
void *parameter)
{
return thread_block_reason(continuation, parameter, AST_NONE);
}
int
thread_run(
thread_t self,
thread_continue_t continuation,
void *parameter,
thread_t new_thread)
{
ast_t handoff = AST_HANDOFF;
funnel_release_check(self, 3);
self->continuation = continuation;
self->parameter = parameter;
while (!thread_invoke(self, new_thread, handoff)) {
processor_t processor = current_processor();
thread_lock(self);
new_thread = thread_select(self, processor);
thread_unlock(self);
handoff = AST_NONE;
}
funnel_refunnel_check(self, 6);
return (self->wait_result);
}
void
thread_continue(
register thread_t thread)
{
register thread_t self = current_thread();
register thread_continue_t continuation;
register void *parameter;
DTRACE_SCHED(on__cpu);
continuation = self->continuation;
parameter = self->parameter;
thread_dispatch(thread, self);
self->continuation = self->parameter = NULL;
funnel_refunnel_check(self, 4);
if (thread != THREAD_NULL)
(void)spllo();
TLOG(1, "thread_continue: calling call_continuation \n");
call_continuation(continuation, parameter, self->wait_result);
}
void
run_queue_init(
run_queue_t rq)
{
int i;
rq->highq = IDLEPRI;
for (i = 0; i < NRQBM; i++)
rq->bitmap[i] = 0;
setbit(MAXPRI - IDLEPRI, rq->bitmap);
rq->urgency = rq->count = 0;
for (i = 0; i < NRQS; i++)
queue_init(&rq->queues[i]);
}
static thread_t
run_queue_dequeue(
run_queue_t rq,
integer_t options)
{
thread_t thread;
queue_t queue = rq->queues + rq->highq;
if (options & SCHED_HEADQ) {
thread = (thread_t)queue->next;
((queue_entry_t)thread)->next->prev = queue;
queue->next = ((queue_entry_t)thread)->next;
}
else {
thread = (thread_t)queue->prev;
((queue_entry_t)thread)->prev->next = queue;
queue->prev = ((queue_entry_t)thread)->prev;
}
thread->runq = PROCESSOR_NULL;
rq->count--;
if (testbit(rq->highq, sched_preempt_pri)) {
rq->urgency--; assert(rq->urgency >= 0);
}
if (queue_empty(queue)) {
if (rq->highq != IDLEPRI)
clrbit(MAXPRI - rq->highq, rq->bitmap);
rq->highq = MAXPRI - ffsbit(rq->bitmap);
}
return (thread);
}
static boolean_t
realtime_queue_insert(
thread_t thread)
{
run_queue_t rq = &rt_runq;
queue_t queue = rq->queues + thread->sched_pri;
uint64_t deadline = thread->realtime.deadline;
boolean_t preempt = FALSE;
simple_lock(&rt_lock);
if (queue_empty(queue)) {
enqueue_tail(queue, (queue_entry_t)thread);
setbit(MAXPRI - thread->sched_pri, rq->bitmap);
if (thread->sched_pri > rq->highq)
rq->highq = thread->sched_pri;
preempt = TRUE;
}
else {
register thread_t entry = (thread_t)queue_first(queue);
while (TRUE) {
if ( queue_end(queue, (queue_entry_t)entry) ||
deadline < entry->realtime.deadline ) {
entry = (thread_t)queue_prev((queue_entry_t)entry);
break;
}
entry = (thread_t)queue_next((queue_entry_t)entry);
}
if ((queue_entry_t)entry == queue)
preempt = TRUE;
insque((queue_entry_t)thread, (queue_entry_t)entry);
}
thread->runq = RT_RUNQ;
rq->count++; rq->urgency++;
simple_unlock(&rt_lock);
return (preempt);
}
static void
realtime_setrun(
processor_t processor,
thread_t thread)
{
processor_set_t pset = processor->processor_set;
if (processor->state == PROCESSOR_IDLE) {
remqueue(&pset->idle_queue, (queue_entry_t)processor);
enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
processor->next_thread = thread;
processor->deadline = thread->realtime.deadline;
processor->state = PROCESSOR_DISPATCHING;
pset_unlock(pset);
if (processor != current_processor())
machine_signal_idle(processor);
return;
}
if (realtime_queue_insert(thread)) {
if (processor == current_processor())
ast_on(AST_PREEMPT | AST_URGENT);
else
cause_ast_check(processor);
}
pset_unlock(pset);
}
static boolean_t
processor_enqueue(
processor_t processor,
thread_t thread,
integer_t options)
{
run_queue_t rq = &processor->runq;
queue_t queue = rq->queues + thread->sched_pri;
boolean_t result = FALSE;
if (queue_empty(queue)) {
enqueue_tail(queue, (queue_entry_t)thread);
setbit(MAXPRI - thread->sched_pri, rq->bitmap);
if (thread->sched_pri > rq->highq) {
rq->highq = thread->sched_pri;
result = TRUE;
}
}
else
if (options & SCHED_TAILQ)
enqueue_tail(queue, (queue_entry_t)thread);
else
enqueue_head(queue, (queue_entry_t)thread);
thread->runq = processor;
if (testbit(thread->sched_pri, sched_preempt_pri))
rq->urgency++;
rq->count++;
return (result);
}
static void
processor_setrun(
processor_t processor,
thread_t thread,
integer_t options)
{
processor_set_t pset = processor->processor_set;
ast_t preempt;
if (processor->state == PROCESSOR_IDLE) {
remqueue(&pset->idle_queue, (queue_entry_t)processor);
enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
processor->next_thread = thread;
processor->deadline = UINT64_MAX;
processor->state = PROCESSOR_DISPATCHING;
pset_unlock(pset);
if (processor != current_processor())
machine_signal_idle(processor);
return;
}
if (testbit(thread->sched_pri, sched_preempt_pri))
preempt = (AST_PREEMPT | AST_URGENT);
else
if (thread->sched_mode & TH_MODE_TIMESHARE && thread->sched_pri < thread->priority)
preempt = AST_NONE;
else
preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
if (!processor_enqueue(processor, thread, options))
preempt = AST_NONE;
if (preempt != AST_NONE) {
if (processor == current_processor()) {
if (csw_check(processor) != AST_NONE)
ast_on(preempt);
}
else
if ( (processor->state == PROCESSOR_RUNNING ||
processor->state == PROCESSOR_SHUTDOWN) &&
thread->sched_pri >= processor->current_pri ) {
cause_ast_check(processor);
}
}
else
if ( processor->state == PROCESSOR_SHUTDOWN &&
thread->sched_pri >= processor->current_pri ) {
cause_ast_check(processor);
}
pset_unlock(pset);
}
#define next_pset(p) (((p)->pset_list != PROCESSOR_SET_NULL)? (p)->pset_list: (p)->node->psets)
static processor_set_t
choose_next_pset(
processor_set_t pset)
{
processor_set_t nset = pset;
do {
nset = next_pset(nset);
} while (nset->processor_count < 1 && nset != pset);
return (nset);
}
static processor_t
choose_processor(
processor_set_t pset,
thread_t thread)
{
processor_set_t nset, cset = pset;
processor_t processor = thread->last_processor;
processor_meta_t pmeta = PROCESSOR_META_NULL;
if (processor != PROCESSOR_NULL) {
if (processor->processor_meta != PROCESSOR_META_NULL)
processor = processor->processor_meta->primary;
if (processor->processor_set != pset || processor->state == PROCESSOR_INACTIVE ||
processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
processor = PROCESSOR_NULL;
else
if (processor->state == PROCESSOR_IDLE)
return (processor);
}
do {
if (!queue_empty(&cset->idle_queue))
return ((processor_t)queue_first(&cset->idle_queue));
if (thread->sched_pri >= BASEPRI_RTQUEUES) {
processor = (processor_t)queue_first(&cset->active_queue);
while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
if (thread->sched_pri > processor->current_pri ||
thread->realtime.deadline < processor->deadline)
return (processor);
if (pmeta == PROCESSOR_META_NULL) {
if (processor->processor_meta != PROCESSOR_META_NULL &&
!queue_empty(&processor->processor_meta->idle_queue))
pmeta = processor->processor_meta;
}
processor = (processor_t)queue_next((queue_entry_t)processor);
}
if (pmeta != PROCESSOR_META_NULL)
return ((processor_t)queue_first(&pmeta->idle_queue));
processor = PROCESSOR_NULL;
}
else {
if (cset->low_pri != PROCESSOR_NULL && cset->low_pri->state != PROCESSOR_INACTIVE &&
cset->low_pri->state != PROCESSOR_SHUTDOWN && cset->low_pri->state != PROCESSOR_OFF_LINE &&
(processor == PROCESSOR_NULL ||
(thread->sched_pri > BASEPRI_DEFAULT && cset->low_pri->current_pri < thread->sched_pri))) {
processor = cset->low_pri;
}
else
if (cset->low_count != PROCESSOR_NULL && cset->low_count->state != PROCESSOR_INACTIVE &&
cset->low_count->state != PROCESSOR_SHUTDOWN && cset->low_count->state != PROCESSOR_OFF_LINE &&
(processor == PROCESSOR_NULL || (thread->sched_pri <= BASEPRI_DEFAULT &&
cset->low_count->runq.count < processor->runq.count))) {
processor = cset->low_count;
}
if (processor == PROCESSOR_NULL) {
processor = (processor_t)dequeue_head(&cset->active_queue);
if (processor != PROCESSOR_NULL)
enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
}
if (processor != PROCESSOR_NULL && pmeta == PROCESSOR_META_NULL) {
if (processor->processor_meta != PROCESSOR_META_NULL &&
!queue_empty(&processor->processor_meta->idle_queue))
pmeta = processor->processor_meta;
}
}
nset = next_pset(cset);
if (nset != pset) {
pset_unlock(cset);
cset = nset;
pset_lock(cset);
}
} while (nset != pset);
do {
if (pmeta != PROCESSOR_META_NULL) {
if (cset != pmeta->primary->processor_set) {
pset_unlock(cset);
cset = pmeta->primary->processor_set;
pset_lock(cset);
}
if (!queue_empty(&pmeta->idle_queue))
return ((processor_t)queue_first(&pmeta->idle_queue));
pmeta = PROCESSOR_META_NULL;
}
if (processor == PROCESSOR_NULL) {
processor = master_processor;
if (cset != processor->processor_set) {
pset_unlock(cset);
cset = processor->processor_set;
pset_lock(cset);
}
return (processor);
}
if (cset != processor->processor_set) {
pset_unlock(cset);
cset = processor->processor_set;
pset_lock(cset);
}
if (processor->state == PROCESSOR_INACTIVE ||
processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
processor = PROCESSOR_NULL;
} while (processor == PROCESSOR_NULL);
return (processor);
}
void
thread_setrun(
thread_t thread,
integer_t options)
{
processor_t processor;
processor_set_t pset;
#if DEBUG
assert(thread_runnable(thread));
#endif
if (thread->sched_stamp != sched_tick)
update_priority(thread);
assert(thread->runq == PROCESSOR_NULL);
if (thread->bound_processor == PROCESSOR_NULL) {
if (thread->affinity_set != AFFINITY_SET_NULL) {
pset = thread->affinity_set->aset_pset;
pset_lock(pset);
processor = choose_processor(pset, thread);
}
else
if (thread->last_processor != PROCESSOR_NULL) {
processor = thread->last_processor;
pset = processor->processor_set;
pset_lock(pset);
if (thread->sched_pri >= BASEPRI_RTQUEUES) {
if (thread->sched_pri <= processor->current_pri ||
thread->realtime.deadline >= processor->deadline)
processor = choose_processor(pset, thread);
}
else
processor = choose_processor(pset, thread);
}
else {
task_t task = thread->task;
pset = task->pset_hint;
if (pset == PROCESSOR_SET_NULL)
pset = current_processor()->processor_set;
pset = choose_next_pset(pset);
pset_lock(pset);
processor = choose_processor(pset, thread);
task->pset_hint = processor->processor_set;
}
}
else {
processor = thread->bound_processor;
pset = processor->processor_set;
pset_lock(pset);
}
if (thread->sched_pri >= BASEPRI_RTQUEUES)
realtime_setrun(processor, thread);
else
processor_setrun(processor, thread, options);
}
processor_set_t
task_choose_pset(
task_t task)
{
processor_set_t pset = task->pset_hint;
if (pset != PROCESSOR_SET_NULL)
pset = choose_next_pset(pset);
return (pset);
}
void
processor_queue_shutdown(
processor_t processor)
{
processor_set_t pset = processor->processor_set;
run_queue_t rq = &processor->runq;
queue_t queue = rq->queues + rq->highq;
int pri = rq->highq, count = rq->count;
thread_t next, thread;
queue_head_t tqueue;
queue_init(&tqueue);
while (count > 0) {
thread = (thread_t)queue_first(queue);
while (!queue_end(queue, (queue_entry_t)thread)) {
next = (thread_t)queue_next((queue_entry_t)thread);
if (thread->bound_processor == PROCESSOR_NULL) {
remqueue(queue, (queue_entry_t)thread);
thread->runq = PROCESSOR_NULL;
rq->count--;
if (testbit(pri, sched_preempt_pri)) {
rq->urgency--; assert(rq->urgency >= 0);
}
if (queue_empty(queue)) {
if (pri != IDLEPRI)
clrbit(MAXPRI - pri, rq->bitmap);
rq->highq = MAXPRI - ffsbit(rq->bitmap);
}
enqueue_tail(&tqueue, (queue_entry_t)thread);
}
count--;
thread = next;
}
queue--; pri--;
}
pset_unlock(pset);
while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) {
thread_lock(thread);
thread_setrun(thread, SCHED_TAILQ);
thread_unlock(thread);
}
}
ast_t
csw_check(
processor_t processor)
{
ast_t result = AST_NONE;
run_queue_t runq;
if (first_timeslice(processor)) {
runq = &rt_runq;
if (runq->highq >= BASEPRI_RTQUEUES)
return (AST_PREEMPT | AST_URGENT);
if (runq->highq > processor->current_pri) {
if (runq->urgency > 0)
return (AST_PREEMPT | AST_URGENT);
result |= AST_PREEMPT;
}
runq = &processor->runq;
if (runq->highq > processor->current_pri) {
if (runq->urgency > 0)
return (AST_PREEMPT | AST_URGENT);
result |= AST_PREEMPT;
}
}
else {
runq = &rt_runq;
if (runq->highq >= processor->current_pri) {
if (runq->urgency > 0)
return (AST_PREEMPT | AST_URGENT);
result |= AST_PREEMPT;
}
runq = &processor->runq;
if (runq->highq >= processor->current_pri) {
if (runq->urgency > 0)
return (AST_PREEMPT | AST_URGENT);
result |= AST_PREEMPT;
}
}
if (result != AST_NONE)
return (result);
if (processor->current_pri < BASEPRI_RTQUEUES && processor->processor_meta != PROCESSOR_META_NULL &&
processor->processor_meta->primary != processor)
return (AST_PREEMPT);
if (machine_cpu_is_inactive(processor->cpu_id))
return (AST_PREEMPT);
if (processor->active_thread->state & TH_SUSP)
return (AST_PREEMPT);
return (AST_NONE);
}
void
set_sched_pri(
thread_t thread,
int priority)
{
boolean_t removed = run_queue_remove(thread);
thread->sched_pri = priority;
if (removed)
thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
else
if (thread->state & TH_RUN) {
processor_t processor = thread->last_processor;
if (thread == current_thread()) {
ast_t preempt;
processor->current_pri = priority;
if ((preempt = csw_check(processor)) != AST_NONE)
ast_on(preempt);
}
else
if ( processor != PROCESSOR_NULL &&
processor->active_thread == thread )
cause_ast_check(processor);
}
}
#if 0
static void
run_queue_check(
run_queue_t rq,
thread_t thread)
{
queue_t q;
queue_entry_t qe;
if (rq != thread->runq)
panic("run_queue_check: thread runq");
if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI)
panic("run_queue_check: thread sched_pri");
q = &rq->queues[thread->sched_pri];
qe = queue_first(q);
while (!queue_end(q, qe)) {
if (qe == (queue_entry_t)thread)
return;
qe = queue_next(qe);
}
panic("run_queue_check: end");
}
#endif
boolean_t
run_queue_remove(
thread_t thread)
{
processor_t processor = thread->runq;
if (processor != PROCESSOR_NULL) {
void * rqlock;
run_queue_t rq;
if (thread->sched_pri < BASEPRI_RTQUEUES) {
rqlock = &processor->processor_set->sched_lock;
rq = &processor->runq;
}
else {
rqlock = &rt_lock; rq = &rt_runq;
}
simple_lock(rqlock);
if (processor == thread->runq) {
remqueue(&rq->queues[0], (queue_entry_t)thread);
rq->count--;
if (testbit(thread->sched_pri, sched_preempt_pri)) {
rq->urgency--; assert(rq->urgency >= 0);
}
if (queue_empty(rq->queues + thread->sched_pri)) {
if (thread->sched_pri != IDLEPRI)
clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
rq->highq = MAXPRI - ffsbit(rq->bitmap);
}
thread->runq = PROCESSOR_NULL;
}
else {
assert(thread->runq == PROCESSOR_NULL);
processor = PROCESSOR_NULL;
}
simple_unlock(rqlock);
}
return (processor != PROCESSOR_NULL);
}
static thread_t
steal_processor_thread(
processor_t processor)
{
run_queue_t rq = &processor->runq;
queue_t queue = rq->queues + rq->highq;
int pri = rq->highq, count = rq->count;
thread_t thread;
while (count > 0) {
thread = (thread_t)queue_first(queue);
while (!queue_end(queue, (queue_entry_t)thread)) {
if (thread->bound_processor == PROCESSOR_NULL) {
remqueue(queue, (queue_entry_t)thread);
thread->runq = PROCESSOR_NULL;
rq->count--;
if (testbit(pri, sched_preempt_pri)) {
rq->urgency--; assert(rq->urgency >= 0);
}
if (queue_empty(queue)) {
if (pri != IDLEPRI)
clrbit(MAXPRI - pri, rq->bitmap);
rq->highq = MAXPRI - ffsbit(rq->bitmap);
}
return (thread);
}
count--;
thread = (thread_t)queue_next((queue_entry_t)thread);
}
queue--; pri--;
}
return (THREAD_NULL);
}
static thread_t
steal_thread(
processor_set_t pset)
{
processor_set_t nset, cset = pset;
processor_t processor;
thread_t thread;
do {
processor = (processor_t)queue_first(&cset->active_queue);
while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
if (processor->runq.count > 0) {
thread = steal_processor_thread(processor);
if (thread != THREAD_NULL) {
remqueue(&cset->active_queue, (queue_entry_t)processor);
enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
pset_unlock(cset);
return (thread);
}
}
processor = (processor_t)queue_next((queue_entry_t)processor);
}
nset = next_pset(cset);
if (nset != pset) {
pset_unlock(cset);
cset = nset;
pset_lock(cset);
}
} while (nset != pset);
pset_unlock(cset);
return (THREAD_NULL);
}
static thread_t
processor_idle(
thread_t thread,
processor_t processor)
{
processor_set_t pset = processor->processor_set;
thread_t new_thread;
int state;
(void)splsched();
KERNEL_DEBUG_CONSTANT(
MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START, (uintptr_t)thread_tid(thread), 0, 0, 0, 0);
timer_switch(&PROCESSOR_DATA(processor, system_state),
mach_absolute_time(), &PROCESSOR_DATA(processor, idle_state));
PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, idle_state);
while (processor->next_thread == THREAD_NULL && processor->runq.count == 0 && rt_runq.count == 0 &&
(thread == THREAD_NULL || ((thread->state & (TH_WAIT|TH_SUSP)) == TH_WAIT && !thread->wake_active))) {
machine_idle();
(void)splsched();
if (processor->state == PROCESSOR_INACTIVE && !machine_cpu_is_inactive(processor->cpu_id))
break;
}
timer_switch(&PROCESSOR_DATA(processor, idle_state),
mach_absolute_time(), &PROCESSOR_DATA(processor, system_state));
PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state);
pset_lock(pset);
state = processor->state;
if (state == PROCESSOR_DISPATCHING) {
new_thread = processor->next_thread;
processor->next_thread = THREAD_NULL;
processor->state = PROCESSOR_RUNNING;
if ( processor->runq.highq > new_thread->sched_pri ||
(rt_runq.highq > 0 && rt_runq.highq >= new_thread->sched_pri) ) {
processor->deadline = UINT64_MAX;
pset_unlock(pset);
thread_lock(new_thread);
thread_setrun(new_thread, SCHED_HEADQ);
thread_unlock(new_thread);
KERNEL_DEBUG_CONSTANT(
MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0);
return (THREAD_NULL);
}
pset_unlock(pset);
KERNEL_DEBUG_CONSTANT(
MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, (uintptr_t)thread_tid(new_thread), 0, 0);
return (new_thread);
}
else
if (state == PROCESSOR_IDLE) {
remqueue(&pset->idle_queue, (queue_entry_t)processor);
processor->state = PROCESSOR_RUNNING;
enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
}
else
if (state == PROCESSOR_INACTIVE) {
processor->state = PROCESSOR_RUNNING;
enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
}
else
if (state == PROCESSOR_SHUTDOWN) {
if ((new_thread = processor->next_thread) != THREAD_NULL) {
processor->next_thread = THREAD_NULL;
processor->deadline = UINT64_MAX;
pset_unlock(pset);
thread_lock(new_thread);
thread_setrun(new_thread, SCHED_HEADQ);
thread_unlock(new_thread);
KERNEL_DEBUG_CONSTANT(
MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0);
return (THREAD_NULL);
}
}
pset_unlock(pset);
KERNEL_DEBUG_CONSTANT(
MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0);
return (THREAD_NULL);
}
void
idle_thread(void)
{
processor_t processor = current_processor();
thread_t new_thread;
new_thread = processor_idle(THREAD_NULL, processor);
if (new_thread != THREAD_NULL) {
thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread);
}
thread_block((thread_continue_t)idle_thread);
}
kern_return_t
idle_thread_create(
processor_t processor)
{
kern_return_t result;
thread_t thread;
spl_t s;
result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread);
if (result != KERN_SUCCESS)
return (result);
s = splsched();
thread_lock(thread);
thread->bound_processor = processor;
processor->idle_thread = thread;
thread->sched_pri = thread->priority = IDLEPRI;
thread->state = (TH_RUN | TH_IDLE);
thread_unlock(thread);
splx(s);
thread_deallocate(thread);
return (KERN_SUCCESS);
}
static uint64_t sched_tick_deadline;
void
sched_startup(void)
{
kern_return_t result;
thread_t thread;
result = kernel_thread_start_priority((thread_continue_t)sched_tick_thread, NULL, MAXPRI_KERNEL, &thread);
if (result != KERN_SUCCESS)
panic("sched_startup");
thread_deallocate(thread);
while (sched_cswtime == 0)
thread_block(THREAD_CONTINUE_NULL);
thread_daemon_init();
thread_call_initialize();
}
static void
sched_tick_continue(void)
{
uint64_t abstime = mach_absolute_time();
sched_tick++;
compute_averages();
thread_update_scan();
clock_deadline_for_periodic_event(sched_tick_interval, abstime,
&sched_tick_deadline);
assert_wait_deadline((event_t)sched_tick_thread, THREAD_UNINT, sched_tick_deadline);
thread_block((thread_continue_t)sched_tick_continue);
}
static uint32_t
time_cswitch(void)
{
uint32_t new, hi, low, accum;
uint64_t abstime;
int i, tries = 7;
accum = hi = low = 0;
for (i = 0; i < tries; ++i) {
abstime = mach_absolute_time();
thread_block(THREAD_CONTINUE_NULL);
new = (uint32_t)(mach_absolute_time() - abstime);
if (i == 0)
accum = hi = low = new;
else {
if (new < low)
low = new;
else
if (new > hi)
hi = new;
accum += new;
}
}
return ((accum - hi - low) / (2 * (tries - 2)));
}
void
sched_tick_thread(void)
{
sched_cswtime = time_cswitch();
sched_tick_deadline = mach_absolute_time();
sched_tick_continue();
}
#define THREAD_UPDATE_SIZE 128
static thread_t thread_update_array[THREAD_UPDATE_SIZE];
static int thread_update_count = 0;
static boolean_t
runq_scan(
run_queue_t runq)
{
register int count;
register queue_t q;
register thread_t thread;
if ((count = runq->count) > 0) {
q = runq->queues + runq->highq;
while (count > 0) {
queue_iterate(q, thread, thread_t, links) {
if ( thread->sched_stamp != sched_tick &&
(thread->sched_mode & TH_MODE_TIMESHARE) ) {
if (thread_update_count == THREAD_UPDATE_SIZE)
return (TRUE);
thread_update_array[thread_update_count++] = thread;
thread_reference_internal(thread);
}
count--;
}
q--;
}
}
return (FALSE);
}
static void
thread_update_scan(void)
{
boolean_t restart_needed = FALSE;
processor_t processor = processor_list;
processor_set_t pset;
thread_t thread;
spl_t s;
do {
do {
pset = processor->processor_set;
s = splsched();
pset_lock(pset);
restart_needed = runq_scan(&processor->runq);
pset_unlock(pset);
splx(s);
if (restart_needed)
break;
thread = processor->idle_thread;
if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
if (thread_update_count == THREAD_UPDATE_SIZE) {
restart_needed = TRUE;
break;
}
thread_update_array[thread_update_count++] = thread;
thread_reference_internal(thread);
}
} while ((processor = processor->processor_list) != NULL);
while (thread_update_count > 0) {
thread = thread_update_array[--thread_update_count];
thread_update_array[thread_update_count] = THREAD_NULL;
s = splsched();
thread_lock(thread);
if ( !(thread->state & (TH_WAIT|TH_SUSP)) &&
thread->sched_stamp != sched_tick )
update_priority(thread);
thread_unlock(thread);
splx(s);
thread_deallocate(thread);
}
} while (restart_needed);
}
#undef thread_wakeup
void
thread_wakeup(
event_t x);
void
thread_wakeup(
event_t x)
{
thread_wakeup_with_result(x, THREAD_AWAKENED);
}
boolean_t
preemption_enabled(void)
{
return (get_preemption_level() == 0 && ml_get_interrupts_enabled());
}
#if DEBUG
static boolean_t
thread_runnable(
thread_t thread)
{
return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN);
}
#endif
#if MACH_KDB
#include <ddb/db_output.h>
#define printf kdbprintf
void db_sched(void);
void
db_sched(void)
{
iprintf("Scheduling Statistics:\n");
db_indent += 2;
iprintf("Thread invocations: csw %d same %d\n",
c_thread_invoke_csw, c_thread_invoke_same);
#if MACH_COUNTERS
iprintf("Thread block: calls %d\n",
c_thread_block_calls);
iprintf("Idle thread:\n\thandoff %d block %d\n",
c_idle_thread_handoff,
c_idle_thread_block);
iprintf("Sched thread blocks: %d\n", c_sched_thread_block);
#endif
db_indent -= 2;
}
#include <ddb/db_output.h>
void db_show_thread_log(void);
void
db_show_thread_log(void)
{
}
#endif