#include <mach/boolean.h>
#include <mach/thread_switch.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_space.h>
#include <kern/ipc_kobject.h>
#include <kern/processor.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
#include <kern/spl.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/ast.h>
#include <mach/policy.h>
#include <kern/syscall_subr.h>
#include <mach/mach_host_server.h>
#include <mach/mach_syscalls.h>
#include <kern/mk_sp.h>
#include <kern/misc_protos.h>
#include <kern/spl.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
#include <kern/assert.h>
#include <kern/thread.h>
#include <mach/mach_host_server.h>
#include <mach/thread_act_server.h>
#include <mach/host_priv_server.h>
#include <sys/kdebug.h>
void
_mk_sp_thread_unblock(
thread_t thread)
{
thread_setrun(thread, TAIL_Q);
thread->current_quantum = 0;
thread->computation_metered = 0;
thread->reason = AST_NONE;
KERNEL_DEBUG_CONSTANT(
MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
(int)thread, (int)thread->sched_pri, 0, 0, 0);
}
void
_mk_sp_thread_done(
thread_t old_thread,
thread_t new_thread,
processor_t processor)
{
clock_get_uptime(&processor->last_dispatch);
if (!(old_thread->state & TH_IDLE)) {
if ( first_quantum(processor) &&
processor->quantum_end > processor->last_dispatch )
old_thread->current_quantum =
(processor->quantum_end - processor->last_dispatch);
else
old_thread->current_quantum = 0;
if (!(old_thread->sched_mode & TH_MODE_REALTIME)) {
if (old_thread->current_quantum < min_std_quantum) {
old_thread->reason |= AST_QUANTUM;
old_thread->current_quantum += std_quantum;
}
}
else
if (old_thread->current_quantum == 0)
old_thread->reason |= AST_QUANTUM;
if ((old_thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
new_thread->current_quantum = old_thread->current_quantum;
old_thread->reason |= AST_QUANTUM;
old_thread->current_quantum = 0;
}
old_thread->last_switch = processor->last_dispatch;
old_thread->computation_metered +=
(old_thread->last_switch - old_thread->computation_epoch);
}
}
void
_mk_sp_thread_begin(
thread_t thread,
processor_t processor)
{
if (!(thread->state & TH_IDLE)) {
if (thread->current_quantum == 0)
thread->current_quantum =
(thread->sched_mode & TH_MODE_REALTIME)?
thread->realtime.computation: std_quantum;
processor->quantum_end =
(processor->last_dispatch + thread->current_quantum);
timer_call_enter1(&processor->quantum_timer,
thread, processor->quantum_end);
processor->slice_quanta =
(thread->sched_mode & TH_MODE_TIMESHARE)?
processor->processor_set->set_quanta: 1;
thread->last_switch = processor->last_dispatch;
thread->computation_epoch = thread->last_switch;
}
else {
timer_call_cancel(&processor->quantum_timer);
processor->slice_quanta = 1;
}
}
void
_mk_sp_thread_dispatch(
thread_t thread)
{
if (thread->reason & AST_QUANTUM)
thread_setrun(thread, TAIL_Q);
else
thread_setrun(thread, HEAD_Q);
thread->reason = AST_NONE;
}
static kern_return_t
thread_policy_common(
thread_t thread,
integer_t policy,
integer_t priority)
{
spl_t s;
if ( thread == THREAD_NULL ||
invalid_policy(policy) )
return(KERN_INVALID_ARGUMENT);
s = splsched();
thread_lock(thread);
if ( !(thread->sched_mode & TH_MODE_REALTIME) &&
!(thread->safe_mode & TH_MODE_REALTIME) ) {
if (!(thread->sched_mode & TH_MODE_FAILSAFE)) {
if (policy == POLICY_TIMESHARE)
thread->sched_mode |= TH_MODE_TIMESHARE;
else
thread->sched_mode &= ~TH_MODE_TIMESHARE;
}
else {
if (policy == POLICY_TIMESHARE)
thread->safe_mode |= TH_MODE_TIMESHARE;
else
thread->safe_mode &= ~TH_MODE_TIMESHARE;
}
if (priority >= thread->max_priority)
priority = thread->max_priority - thread->task_priority;
else
if (priority >= MINPRI_KERNEL)
priority -= MINPRI_KERNEL;
else
if (priority >= MINPRI_SYSTEM)
priority -= MINPRI_SYSTEM;
else
priority -= BASEPRI_DEFAULT;
priority += thread->task_priority;
if (priority > thread->max_priority)
priority = thread->max_priority;
else
if (priority < MINPRI)
priority = MINPRI;
thread->importance = priority - thread->task_priority;
set_priority(thread, priority);
}
thread_unlock(thread);
splx(s);
return (KERN_SUCCESS);
}
kern_return_t
thread_set_policy(
thread_act_t thr_act,
processor_set_t pset,
policy_t policy,
policy_base_t base,
mach_msg_type_number_t base_count,
policy_limit_t limit,
mach_msg_type_number_t limit_count)
{
thread_t thread;
int max, bas;
kern_return_t result = KERN_SUCCESS;
if ( thr_act == THR_ACT_NULL ||
pset == PROCESSOR_SET_NULL )
return (KERN_INVALID_ARGUMENT);
thread = act_lock_thread(thr_act);
if (thread == THREAD_NULL) {
act_unlock_thread(thr_act);
return(KERN_INVALID_ARGUMENT);
}
if (pset != thread->processor_set) {
act_unlock_thread(thr_act);
return(KERN_FAILURE);
}
switch (policy) {
case POLICY_RR:
{
policy_rr_base_t rr_base = (policy_rr_base_t) base;
policy_rr_limit_t rr_limit = (policy_rr_limit_t) limit;
if ( base_count != POLICY_RR_BASE_COUNT ||
limit_count != POLICY_RR_LIMIT_COUNT ) {
result = KERN_INVALID_ARGUMENT;
break;
}
bas = rr_base->base_priority;
max = rr_limit->max_priority;
if (invalid_pri(bas) || invalid_pri(max)) {
result = KERN_INVALID_ARGUMENT;
break;
}
break;
}
case POLICY_FIFO:
{
policy_fifo_base_t fifo_base = (policy_fifo_base_t) base;
policy_fifo_limit_t fifo_limit = (policy_fifo_limit_t) limit;
if ( base_count != POLICY_FIFO_BASE_COUNT ||
limit_count != POLICY_FIFO_LIMIT_COUNT) {
result = KERN_INVALID_ARGUMENT;
break;
}
bas = fifo_base->base_priority;
max = fifo_limit->max_priority;
if (invalid_pri(bas) || invalid_pri(max)) {
result = KERN_INVALID_ARGUMENT;
break;
}
break;
}
case POLICY_TIMESHARE:
{
policy_timeshare_base_t ts_base = (policy_timeshare_base_t) base;
policy_timeshare_limit_t ts_limit =
(policy_timeshare_limit_t) limit;
if ( base_count != POLICY_TIMESHARE_BASE_COUNT ||
limit_count != POLICY_TIMESHARE_LIMIT_COUNT ) {
result = KERN_INVALID_ARGUMENT;
break;
}
bas = ts_base->base_priority;
max = ts_limit->max_priority;
if (invalid_pri(bas) || invalid_pri(max)) {
result = KERN_INVALID_ARGUMENT;
break;
}
break;
}
default:
result = KERN_INVALID_POLICY;
}
if (result != KERN_SUCCESS) {
act_unlock_thread(thr_act);
return(result);
}
result = thread_policy_common(thread, policy, bas);
act_unlock_thread(thr_act);
return(result);
}
kern_return_t
thread_policy(
thread_act_t thr_act,
policy_t policy,
policy_base_t base,
mach_msg_type_number_t count,
boolean_t set_limit)
{
thread_t thread;
processor_set_t pset;
kern_return_t result = KERN_SUCCESS;
policy_limit_t limit;
int limcount;
policy_rr_limit_data_t rr_limit;
policy_fifo_limit_data_t fifo_limit;
policy_timeshare_limit_data_t ts_limit;
if (thr_act == THR_ACT_NULL)
return (KERN_INVALID_ARGUMENT);
thread = act_lock_thread(thr_act);
pset = thread->processor_set;
if ( thread == THREAD_NULL ||
pset == PROCESSOR_SET_NULL ){
act_unlock_thread(thr_act);
return(KERN_INVALID_ARGUMENT);
}
if ( invalid_policy(policy) ||
((POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO) & policy) == 0 ) {
act_unlock_thread(thr_act);
return(KERN_INVALID_POLICY);
}
if (set_limit) {
switch (policy) {
case POLICY_RR:
{
policy_rr_base_t rr_base;
if (count != POLICY_RR_BASE_COUNT) {
result = KERN_INVALID_ARGUMENT;
break;
}
limcount = POLICY_RR_LIMIT_COUNT;
rr_base = (policy_rr_base_t) base;
rr_limit.max_priority = rr_base->base_priority;
limit = (policy_limit_t) &rr_limit;
break;
}
case POLICY_FIFO:
{
policy_fifo_base_t fifo_base;
if (count != POLICY_FIFO_BASE_COUNT) {
result = KERN_INVALID_ARGUMENT;
break;
}
limcount = POLICY_FIFO_LIMIT_COUNT;
fifo_base = (policy_fifo_base_t) base;
fifo_limit.max_priority = fifo_base->base_priority;
limit = (policy_limit_t) &fifo_limit;
break;
}
case POLICY_TIMESHARE:
{
policy_timeshare_base_t ts_base;
if (count != POLICY_TIMESHARE_BASE_COUNT) {
result = KERN_INVALID_ARGUMENT;
break;
}
limcount = POLICY_TIMESHARE_LIMIT_COUNT;
ts_base = (policy_timeshare_base_t) base;
ts_limit.max_priority = ts_base->base_priority;
limit = (policy_limit_t) &ts_limit;
break;
}
default:
result = KERN_INVALID_POLICY;
break;
}
}
else {
switch (policy) {
case POLICY_RR:
{
policy_rr_base_t rr_base;
if (count != POLICY_RR_BASE_COUNT) {
result = KERN_INVALID_ARGUMENT;
break;
}
limcount = POLICY_RR_LIMIT_COUNT;
rr_base = (policy_rr_base_t) base;
if (rr_base->base_priority > thread->max_priority) {
result = KERN_POLICY_LIMIT;
break;
}
rr_limit.max_priority = thread->max_priority;
limit = (policy_limit_t) &rr_limit;
break;
}
case POLICY_FIFO:
{
policy_fifo_base_t fifo_base;
if (count != POLICY_FIFO_BASE_COUNT) {
result = KERN_INVALID_ARGUMENT;
break;
}
limcount = POLICY_FIFO_LIMIT_COUNT;
fifo_base = (policy_fifo_base_t) base;
if (fifo_base->base_priority > thread->max_priority) {
result = KERN_POLICY_LIMIT;
break;
}
fifo_limit.max_priority = thread->max_priority;
limit = (policy_limit_t) &fifo_limit;
break;
}
case POLICY_TIMESHARE:
{
policy_timeshare_base_t ts_base;
if (count != POLICY_TIMESHARE_BASE_COUNT) {
result = KERN_INVALID_ARGUMENT;
break;
}
limcount = POLICY_TIMESHARE_LIMIT_COUNT;
ts_base = (policy_timeshare_base_t) base;
if (ts_base->base_priority > thread->max_priority) {
result = KERN_POLICY_LIMIT;
break;
}
ts_limit.max_priority = thread->max_priority;
limit = (policy_limit_t) &ts_limit;
break;
}
default:
result = KERN_INVALID_POLICY;
break;
}
}
act_unlock_thread(thr_act);
if (result == KERN_SUCCESS)
result = thread_set_policy(thr_act, pset,
policy, base, count, limit, limcount);
return(result);
}
shift_data_t wait_shift[32] = {
{1,1},{1,3},{1,-3},{2,-7},{3,5},{3,-5},{4,-8},{5,7},
{5,-7},{6,-10},{7,10},{7,-9},{8,-11},{9,12},{9,-11},{10,-13},
{11,14},{11,-13},{12,-15},{13,17},{13,-15},{14,-17},{15,19},{16,18},
{16,-19},{17,22},{18,20},{18,-20},{19,26},{20,22},{20,-22},{21,-27}};
#ifdef PRI_SHIFT_2
#if PRI_SHIFT_2 > 0
#define do_priority_computation(thread, pri) \
MACRO_BEGIN \
(pri) = (thread)->priority \
- ((thread)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)) \
- ((thread)->sched_usage >> (PRI_SHIFT_2 + SCHED_SHIFT)); \
if ((pri) < MINPRI_STANDARD) \
(pri) = MINPRI_STANDARD; \
else \
if ((pri) > MAXPRI_STANDARD) \
(pri) = MAXPRI_STANDARD; \
MACRO_END
#else
#define do_priority_computation(thread, pri) \
MACRO_BEGIN \
(pri) = (thread)->priority \
- ((thread)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)) \
+ ((thread)->sched_usage >> (SCHED_SHIFT - PRI_SHIFT_2)); \
if ((pri) < MINPRI_STANDARD) \
(pri) = MINPRI_STANDARD; \
else \
if ((pri) > MAXPRI_STANDARD) \
(pri) = MAXPRI_STANDARD; \
MACRO_END
#endif
#else
#define do_priority_computation(thread, pri) \
MACRO_BEGIN \
(pri) = (thread)->priority \
- ((thread)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)); \
if ((pri) < MINPRI_STANDARD) \
(pri) = MINPRI_STANDARD; \
else \
if ((pri) > MAXPRI_STANDARD) \
(pri) = MAXPRI_STANDARD; \
MACRO_END
#endif
void
set_priority(
register thread_t thread,
register int priority)
{
thread->priority = priority;
compute_priority(thread, FALSE);
}
void
compute_priority(
register thread_t thread,
boolean_t override_depress)
{
register int priority;
if ( !(thread->sched_mode & TH_MODE_PROMOTED) &&
(!(thread->sched_mode & TH_MODE_ISDEPRESSED) ||
override_depress ) ) {
if (thread->sched_mode & TH_MODE_TIMESHARE)
do_priority_computation(thread, priority);
else
priority = thread->priority;
set_sched_pri(thread, priority);
}
}
void
compute_my_priority(
register thread_t thread)
{
register int priority;
do_priority_computation(thread, priority);
assert(thread->runq == RUN_QUEUE_NULL);
thread->sched_pri = priority;
}
void
update_priority(
register thread_t thread)
{
register unsigned int ticks;
register shift_t shiftp;
ticks = sched_tick - thread->sched_stamp;
assert(ticks != 0);
thread->sched_stamp += ticks;
thread_timer_delta(thread);
if (ticks > 30) {
thread->cpu_usage = 0;
thread->sched_usage = 0;
}
else {
thread->cpu_usage += thread->cpu_delta;
thread->sched_usage += thread->sched_delta;
shiftp = &wait_shift[ticks];
if (shiftp->shift2 > 0) {
thread->cpu_usage =
(thread->cpu_usage >> shiftp->shift1) +
(thread->cpu_usage >> shiftp->shift2);
thread->sched_usage =
(thread->sched_usage >> shiftp->shift1) +
(thread->sched_usage >> shiftp->shift2);
}
else {
thread->cpu_usage =
(thread->cpu_usage >> shiftp->shift1) -
(thread->cpu_usage >> -(shiftp->shift2));
thread->sched_usage =
(thread->sched_usage >> shiftp->shift1) -
(thread->sched_usage >> -(shiftp->shift2));
}
}
thread->cpu_delta = 0;
thread->sched_delta = 0;
if ( (thread->sched_mode & TH_MODE_FAILSAFE) &&
thread->sched_stamp >= thread->safe_release ) {
if (!(thread->safe_mode & TH_MODE_TIMESHARE)) {
if (thread->safe_mode & TH_MODE_REALTIME) {
thread->priority = BASEPRI_REALTIME;
thread->sched_mode |= TH_MODE_REALTIME;
}
thread->sched_mode &= ~TH_MODE_TIMESHARE;
if (!(thread->sched_mode & TH_MODE_ISDEPRESSED))
set_sched_pri(thread, thread->priority);
}
thread->safe_mode = 0;
thread->sched_mode &= ~TH_MODE_FAILSAFE;
}
if ( (thread->sched_mode & TH_MODE_TIMESHARE) &&
!(thread->sched_mode & TH_MODE_PROMOTED) &&
!(thread->sched_mode & TH_MODE_ISDEPRESSED) ) {
register int new_pri;
do_priority_computation(thread, new_pri);
if (new_pri != thread->sched_pri) {
run_queue_t runq;
runq = rem_runq(thread);
thread->sched_pri = new_pri;
if (runq != RUN_QUEUE_NULL)
thread_setrun(thread, TAIL_Q);
}
}
}
void
_mk_sp_thread_switch_continue(void)
{
register thread_t self = current_thread();
int wait_result = self->wait_result;
int option = self->saved.swtch.option;
if (option == SWITCH_OPTION_WAIT && wait_result != THREAD_TIMED_OUT)
thread_cancel_timer();
else
if (option == SWITCH_OPTION_DEPRESS)
_mk_sp_thread_depress_abort(self, FALSE);
thread_syscall_return(KERN_SUCCESS);
}
kern_return_t
_mk_sp_thread_switch(
thread_act_t hint_act,
int option,
mach_msg_timeout_t option_time)
{
register thread_t self = current_thread();
register processor_t myprocessor;
int s;
if (hint_act != THR_ACT_NULL) {
register thread_t thread = act_lock_thread(hint_act);
if ( thread != THREAD_NULL &&
thread != self &&
thread->top_act == hint_act ) {
s = splsched();
thread_lock(thread);
if ( thread->processor_set == self->processor_set &&
rem_runq(thread) != RUN_QUEUE_NULL ) {
thread_unlock(thread);
act_unlock_thread(hint_act);
act_deallocate(hint_act);
if (option == SWITCH_OPTION_WAIT)
assert_wait_timeout(option_time, THREAD_ABORTSAFE);
else
if (option == SWITCH_OPTION_DEPRESS)
_mk_sp_thread_depress_ms(option_time);
self->saved.swtch.option = option;
thread_run(self, _mk_sp_thread_switch_continue, thread);
}
thread_unlock(thread);
splx(s);
}
act_unlock_thread(hint_act);
act_deallocate(hint_act);
}
mp_disable_preemption();
myprocessor = current_processor();
if ( option != SWITCH_OPTION_NONE ||
myprocessor->processor_set->runq.count > 0 ||
myprocessor->runq.count > 0 ) {
mp_enable_preemption();
if (option == SWITCH_OPTION_WAIT)
assert_wait_timeout(option_time, THREAD_ABORTSAFE);
else
if (option == SWITCH_OPTION_DEPRESS)
_mk_sp_thread_depress_ms(option_time);
self->saved.swtch.option = option;
thread_block_reason(_mk_sp_thread_switch_continue,
(option == SWITCH_OPTION_DEPRESS)?
AST_YIELD: AST_NONE);
}
else
mp_enable_preemption();
out:
if (option == SWITCH_OPTION_WAIT)
thread_cancel_timer();
else
if (option == SWITCH_OPTION_DEPRESS)
_mk_sp_thread_depress_abort(self, FALSE);
return (KERN_SUCCESS);
}
void
_mk_sp_thread_depress_abstime(
uint64_t interval)
{
register thread_t self = current_thread();
uint64_t deadline;
spl_t s;
s = splsched();
wake_lock(self);
thread_lock(self);
if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) {
processor_t myprocessor = self->last_processor;
self->sched_pri = DEPRESSPRI;
myprocessor->current_pri = self->sched_pri;
self->sched_mode &= ~TH_MODE_PREEMPT;
self->sched_mode |= TH_MODE_DEPRESS;
thread_unlock(self);
if (interval != 0) {
clock_absolutetime_interval_to_deadline(interval, &deadline);
if (!timer_call_enter(&self->depress_timer, deadline))
self->depress_timer_active++;
}
}
else
thread_unlock(self);
wake_unlock(self);
splx(s);
}
void
_mk_sp_thread_depress_ms(
mach_msg_timeout_t interval)
{
uint64_t abstime;
clock_interval_to_absolutetime_interval(
interval, 1000*NSEC_PER_USEC, &abstime);
_mk_sp_thread_depress_abstime(abstime);
}
void
thread_depress_expire(
timer_call_param_t p0,
timer_call_param_t p1)
{
thread_t thread = p0;
spl_t s;
s = splsched();
wake_lock(thread);
if (--thread->depress_timer_active == 1) {
thread_lock(thread);
thread->sched_mode &= ~TH_MODE_ISDEPRESSED;
compute_priority(thread, FALSE);
thread_unlock(thread);
}
else
if (thread->depress_timer_active == 0)
thread_wakeup_one(&thread->depress_timer_active);
wake_unlock(thread);
splx(s);
}
kern_return_t
_mk_sp_thread_depress_abort(
register thread_t thread,
boolean_t abortall)
{
kern_return_t result = KERN_NOT_DEPRESSED;
spl_t s;
s = splsched();
wake_lock(thread);
thread_lock(thread);
if (abortall || !(thread->sched_mode & TH_MODE_POLLDEPRESS)) {
if (thread->sched_mode & TH_MODE_ISDEPRESSED) {
thread->sched_mode &= ~TH_MODE_ISDEPRESSED;
compute_priority(thread, FALSE);
result = KERN_SUCCESS;
}
thread_unlock(thread);
if (timer_call_cancel(&thread->depress_timer))
thread->depress_timer_active--;
}
else
thread_unlock(thread);
wake_unlock(thread);
splx(s);
return (result);
}
void
_mk_sp_thread_perhaps_yield(
thread_t self)
{
spl_t s;
assert(self == current_thread());
s = splsched();
if (!(self->sched_mode & (TH_MODE_REALTIME|TH_MODE_TIMESHARE))) {
extern uint64_t max_poll_computation;
extern int sched_poll_yield_shift;
uint64_t abstime, total_computation;
clock_get_uptime(&abstime);
total_computation = abstime - self->computation_epoch;
total_computation += self->computation_metered;
if (total_computation >= max_poll_computation) {
processor_t myprocessor = current_processor();
ast_t preempt;
wake_lock(self);
thread_lock(self);
if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) {
self->sched_pri = DEPRESSPRI;
myprocessor->current_pri = self->sched_pri;
self->sched_mode &= ~TH_MODE_PREEMPT;
}
self->computation_epoch = abstime;
self->computation_metered = 0;
self->sched_mode |= TH_MODE_POLLDEPRESS;
thread_unlock(self);
abstime += (total_computation >> sched_poll_yield_shift);
if (!timer_call_enter(&self->depress_timer, abstime))
self->depress_timer_active++;
wake_unlock(self);
if ((preempt = csw_check(self, myprocessor)) != AST_NONE)
ast_on(preempt);
}
}
splx(s);
}