#define SSE (1 << 25)
static int
has_sse (void)
{
#ifdef __x86_64__
return 1;
#else
unsigned int eax, ebx, ecx, edx;
asm volatile ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;"
"pushl %0; popfl; pushfl; popl %0; popfl"
: "=&r" (eax), "=&r" (ebx)
: "i" (0x00200000));
if (((eax ^ ebx) & 0x00200000) == 0)
return 0;
asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
: "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
: "0" (0));
if (eax == 0)
return 0;
asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
: "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
: "0" (1));
if (edx & SSE)
return 1;
return 0;
#endif
}
void set_fpu (void)
{
unsigned short cw;
unsigned int cw_sse;
#define _FPU_MASK_IM 0x01
#define _FPU_MASK_DM 0x02
#define _FPU_MASK_ZM 0x04
#define _FPU_MASK_OM 0x08
#define _FPU_MASK_UM 0x10
#define _FPU_MASK_PM 0x20
asm volatile ("fnstcw %0" : "=m" (cw));
cw |= _FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM | _FPU_MASK_PM;
if (options.fpe & GFC_FPE_INVALID) cw &= ~_FPU_MASK_IM;
if (options.fpe & GFC_FPE_DENORMAL) cw &= ~_FPU_MASK_DM;
if (options.fpe & GFC_FPE_ZERO) cw &= ~_FPU_MASK_ZM;
if (options.fpe & GFC_FPE_OVERFLOW) cw &= ~_FPU_MASK_OM;
if (options.fpe & GFC_FPE_UNDERFLOW) cw &= ~_FPU_MASK_UM;
if (options.fpe & GFC_FPE_PRECISION) cw &= ~_FPU_MASK_PM;
asm volatile ("fldcw %0" : : "m" (cw));
if (has_sse())
{
asm volatile ("stmxcsr %0" : "=m" (cw_sse));
cw_sse &= 0xFFFF0000;
cw_sse |= (_FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM | _FPU_MASK_OM
| _FPU_MASK_UM | _FPU_MASK_PM ) << 7;
if (options.fpe & GFC_FPE_INVALID) cw_sse &= ~(_FPU_MASK_IM << 7);
if (options.fpe & GFC_FPE_DENORMAL) cw_sse &= ~(_FPU_MASK_DM << 7);
if (options.fpe & GFC_FPE_ZERO) cw_sse &= ~(_FPU_MASK_ZM << 7);
if (options.fpe & GFC_FPE_OVERFLOW) cw_sse &= ~(_FPU_MASK_OM << 7);
if (options.fpe & GFC_FPE_UNDERFLOW) cw_sse &= ~(_FPU_MASK_UM << 7);
if (options.fpe & GFC_FPE_PRECISION) cw_sse &= ~(_FPU_MASK_PM << 7);
asm volatile ("ldmxcsr %0" : : "m" (cw_sse));
}
}