#include <platforms.h>
#include <mach/exception_types.h>
#include <mach/i386/thread_status.h>
#include <mach/i386/fp_reg.h>
#include <kern/mach_param.h>
#include <kern/processor.h>
#include <kern/thread.h>
#include <kern/zalloc.h>
#include <kern/misc_protos.h>
#include <kern/spl.h>
#include <kern/assert.h>
#include <i386/thread.h>
#include <i386/fpu.h>
#include <i386/trap.h>
#include <architecture/i386/pio.h>
#include <i386/cpuid.h>
#include <i386/misc_protos.h>
#include <i386/proc_reg.h>
int fp_kind = FP_NO;
zone_t ifps_zone;
#define ALIGNED(addr,size) (((unsigned)(addr)&((size)-1))==0)
extern void fpinit(void);
extern void fp_save(
thread_t thr_act);
extern void fp_load(
thread_t thr_act);
static void configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps);
struct x86_fpsave_state starting_fp_state;
static unsigned int mxcsr_capability_mask;
static void
configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps)
{
assert(ALIGNED(ifps,16));
bzero(ifps, sizeof(*ifps));
clear_ts();
__asm__ volatile("fxsave %0" : "=m" (ifps->fx_save_state));
mxcsr_capability_mask = ifps->fx_save_state.fx_MXCSR_MASK;
if (mxcsr_capability_mask == 0)
mxcsr_capability_mask = 0xffbf;
set_ts();
}
static struct x86_fpsave_state *
fp_state_alloc(void)
{
struct x86_fpsave_state *ifps;
ifps = (struct x86_fpsave_state *)zalloc(ifps_zone);
assert(ALIGNED(ifps,16));
bzero((char *)ifps, sizeof *ifps);
return ifps;
}
static inline void
fp_state_free(struct x86_fpsave_state *ifps)
{
zfree(ifps_zone, ifps);
}
void
init_fpu(void)
{
unsigned short status, control;
set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE);
fninit();
status = fnstsw();
fnstcw(&control);
if ((status & 0xff) == 0 &&
(control & 0x103f) == 0x3f)
{
if (cpuid_features() & CPUID_FEATURE_FXSR) {
fp_kind = FP_FXSR;
set_cr4(get_cr4() | CR4_FXS);
if (cpuid_features() & CPUID_FEATURE_SSE) {
set_cr4(get_cr4() | CR4_XMM);
}
} else
panic("fpu is not FP_FXSR");
fpinit();
fxsave(&starting_fp_state.fx_save_state);
set_cr0(get_cr0() | CR0_TS | CR0_MP);
}
else
{
panic("fpu is not FP_FXSR");
}
}
void
fpu_module_init(void)
{
struct x86_fpsave_state *new_ifps;
ifps_zone = zinit(sizeof(struct x86_fpsave_state),
THREAD_MAX * sizeof(struct x86_fpsave_state),
THREAD_CHUNK * sizeof(struct x86_fpsave_state),
"x86 fpsave state");
new_ifps = fp_state_alloc();
configure_mxcsr_capability_mask(new_ifps);
fp_state_free(new_ifps);
}
void
fpu_free(struct x86_fpsave_state *fps)
{
fp_state_free(fps);
}
kern_return_t
fpu_set_fxstate(
thread_t thr_act,
thread_state_t tstate)
{
struct x86_fpsave_state *ifps;
struct x86_fpsave_state *new_ifps;
x86_float_state64_t *state;
pcb_t pcb;
if (fp_kind == FP_NO)
return KERN_FAILURE;
state = (x86_float_state64_t *)tstate;
assert(thr_act != THREAD_NULL);
pcb = thr_act->machine.pcb;
if (state == NULL) {
simple_lock(&pcb->lock);
ifps = pcb->ifps;
pcb->ifps = 0;
simple_unlock(&pcb->lock);
if (ifps != 0)
fp_state_free(ifps);
} else {
new_ifps = 0;
Retry:
simple_lock(&pcb->lock);
ifps = pcb->ifps;
if (ifps == 0) {
if (new_ifps == 0) {
simple_unlock(&pcb->lock);
new_ifps = fp_state_alloc();
goto Retry;
}
ifps = new_ifps;
new_ifps = 0;
pcb->ifps = ifps;
}
bcopy((char *)&state->fpu_fcw,
(char *)&ifps->fx_save_state, sizeof(struct x86_fx_save));
ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32;
ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask;
simple_unlock(&pcb->lock);
if (new_ifps != 0)
fp_state_free(new_ifps);
}
return KERN_SUCCESS;
}
kern_return_t
fpu_get_fxstate(
thread_t thr_act,
thread_state_t tstate)
{
struct x86_fpsave_state *ifps;
x86_float_state64_t *state;
kern_return_t ret = KERN_FAILURE;
pcb_t pcb;
if (fp_kind == FP_NO)
return KERN_FAILURE;
state = (x86_float_state64_t *)tstate;
assert(thr_act != THREAD_NULL);
pcb = thr_act->machine.pcb;
simple_lock(&pcb->lock);
ifps = pcb->ifps;
if (ifps == 0) {
bcopy((char *)&starting_fp_state.fx_save_state,
(char *)&state->fpu_fcw, sizeof(struct x86_fx_save));
simple_unlock(&pcb->lock);
return KERN_SUCCESS;
}
if (thr_act == current_thread()) {
boolean_t intr;
intr = ml_set_interrupts_enabled(FALSE);
clear_ts();
fp_save(thr_act);
clear_fpu();
(void)ml_set_interrupts_enabled(intr);
}
if (ifps->fp_valid) {
bcopy((char *)&ifps->fx_save_state,
(char *)&state->fpu_fcw, sizeof(struct x86_fx_save));
ret = KERN_SUCCESS;
}
simple_unlock(&pcb->lock);
return ret;
}
void
fpu_dup_fxstate(
thread_t parent,
thread_t child)
{
struct x86_fpsave_state *new_ifps = NULL;
boolean_t intr;
pcb_t ppcb;
ppcb = parent->machine.pcb;
if (ppcb->ifps == NULL)
return;
if (child->machine.pcb->ifps)
panic("fpu_dup_fxstate: child's ifps non-null");
new_ifps = fp_state_alloc();
simple_lock(&ppcb->lock);
if (ppcb->ifps != NULL) {
intr = ml_set_interrupts_enabled(FALSE);
clear_ts();
fp_save(parent);
clear_fpu();
(void)ml_set_interrupts_enabled(intr);
if (ppcb->ifps->fp_valid) {
child->machine.pcb->ifps = new_ifps;
bcopy((char *)&(ppcb->ifps->fx_save_state),
(char *)&(child->machine.pcb->ifps->fx_save_state), sizeof(struct x86_fx_save));
new_ifps->fp_save_layout = ppcb->ifps->fp_save_layout;
new_ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask;
new_ifps = NULL;
}
}
simple_unlock(&ppcb->lock);
if (new_ifps != NULL)
fp_state_free(new_ifps);
}
void
fpinit(void)
{
unsigned short control;
clear_ts();
fninit();
fnstcw(&control);
control &= ~(FPC_PC|FPC_RC);
control |= (FPC_PC_64 |
FPC_RC_RN |
FPC_ZE |
FPC_OE |
FPC_UE |
FPC_IE |
FPC_DE |
FPC_PE);
fldcw(control);
__builtin_ia32_ldmxcsr(0x1f80);
}
void
fpnoextflt(void)
{
boolean_t intr;
thread_t thr_act;
pcb_t pcb;
struct x86_fpsave_state *ifps = 0;
thr_act = current_thread();
pcb = thr_act->machine.pcb;
if (pcb->ifps == 0 && !get_interrupt_level())
ifps = fp_state_alloc();
intr = ml_set_interrupts_enabled(FALSE);
clear_ts();
if (get_interrupt_level()) {
fp_save(thr_act);
fpinit();
} else {
if (pcb->ifps == 0) {
pcb->ifps = ifps;
ifps = 0;
}
fp_load(thr_act);
}
(void)ml_set_interrupts_enabled(intr);
if (ifps)
fp_state_free(ifps);
}
void
fpextovrflt(void)
{
thread_t thr_act = current_thread();
pcb_t pcb;
struct x86_fpsave_state *ifps;
boolean_t intr;
intr = ml_set_interrupts_enabled(FALSE);
if (get_interrupt_level())
panic("FPU segment overrun exception at interrupt context\n");
if (current_task() == kernel_task)
panic("FPU segment overrun exception in kernel thread context\n");
pcb = thr_act->machine.pcb;
simple_lock(&pcb->lock);
ifps = pcb->ifps;
pcb->ifps = 0;
simple_unlock(&pcb->lock);
clear_ts();
fninit();
clear_fpu();
(void)ml_set_interrupts_enabled(intr);
if (ifps)
zfree(ifps_zone, ifps);
i386_exception(EXC_BAD_ACCESS, VM_PROT_READ|VM_PROT_EXECUTE, 0);
}
void
fpexterrflt(void)
{
thread_t thr_act = current_thread();
struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
boolean_t intr;
intr = ml_set_interrupts_enabled(FALSE);
if (get_interrupt_level())
panic("FPU error exception at interrupt context\n");
if (current_task() == kernel_task)
panic("FPU error exception in kernel thread context\n");
fp_save(thr_act);
(void)ml_set_interrupts_enabled(intr);
i386_exception(EXC_ARITHMETIC,
EXC_I386_EXTERR,
ifps->fx_save_state.fx_status);
}
void
fp_save(
thread_t thr_act)
{
pcb_t pcb = thr_act->machine.pcb;
struct x86_fpsave_state *ifps = pcb->ifps;
if (ifps != 0 && !ifps->fp_valid) {
assert((get_cr0() & CR0_TS) == 0);
ifps->fp_valid = TRUE;
if (!thread_is_64bit(thr_act)) {
fxsave(&ifps->fx_save_state);
ifps->fp_save_layout = FXSAVE32;
}
else {
fxsave64(&ifps->fx_save_state);
ifps->fp_save_layout = FXSAVE64;
}
}
}
void
fp_load(
thread_t thr_act)
{
pcb_t pcb = thr_act->machine.pcb;
struct x86_fpsave_state *ifps;
ifps = pcb->ifps;
if (ifps == 0 || ifps->fp_valid == FALSE) {
if (ifps == 0) {
ifps = fp_state_alloc();
pcb->ifps = ifps;
}
fpinit();
} else {
assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64);
if (ifps->fp_save_layout == FXSAVE32) {
fxrstor(&ifps->fx_save_state);
}
else if (ifps->fp_save_layout == FXSAVE64) {
fxrstor64(&ifps->fx_save_state);
}
}
ifps->fp_valid = FALSE;
}
void
fpflush(__unused thread_t thr_act)
{
}
void
fpSSEexterrflt(void)
{
thread_t thr_act = current_thread();
struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
boolean_t intr;
intr = ml_set_interrupts_enabled(FALSE);
if (get_interrupt_level())
panic("SSE exception at interrupt context\n");
if (current_task() == kernel_task)
panic("SSE exception in kernel thread context\n");
fp_save(thr_act);
(void)ml_set_interrupts_enabled(intr);
assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64);
i386_exception(EXC_ARITHMETIC,
EXC_I386_SSEEXTERR,
ifps->fx_save_state.fx_status);
}
void
fp_setvalid(boolean_t value) {
thread_t thr_act = current_thread();
struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
if (ifps) {
ifps->fp_valid = value;
if (value == TRUE)
clear_fpu();
}
}