/* * Written by Ian Ollmann. * Copyright © 2005 Apple Computer Inc. */ #include <machine/asm.h> #include "abi.h" // Note: I tried doing rintl with fistpll, but it fails for 0x1.0p63-1ulp. ENTRY( rintl ) movswl 8+FRAME_SIZE(STACKP), %eax movl %eax, %edx // sign + exponent of x andl $0x7fff, %eax // exponent of x andl $0x80000000, %edx // signof (x) fldt FRAME_SIZE( STACKP ) // { x } subl $16383, %eax movl %edx, %ecx // signof (x) orl $0x5f000000, %edx // copysignf( 0x1.0p63f, x ) cmpl $63, %eax // if( |x| >= 0x1.0p63L || isnan(x) ) jae 1f // goto 2 // 1.0L <= |x| < 0x1.0p63L movl %edx, FRAME_SIZE( STACKP ) flds FRAME_SIZE(STACKP) // copysignl( 0x1.0p63L, x ) fadd %st(0), %st(1) // x + copysignl( 0x1.0p63, x ) fsubrp // x + copysignl( 0x1.0p63, x ) - copysignl( 0x1.0p63, x ) ret 1: jge 2f // if( |x| >= 0x1.0p63L || isnan(x) ) goto 2 // |x| < 1.0L movl %edx, FRAME_SIZE( STACKP ) orl $0x3f800000, %ecx // copysign( 1, x ) flds FRAME_SIZE(STACKP) // copysignl( 0x1.0p63L, x ) movl %ecx, 4+FRAME_SIZE(STACKP) fadd %st(0), %st(1) // x + copysignl( 0x1.0p63, x ) fsubrp // x + copysignl( 0x1.0p63, x ) - copysignl( 0x1.0p63, x ) fabs // strip sign fmuls 4+FRAME_SIZE(STACKP) // restore sign to signof x 2: ret ENTRY( rintf ) #if defined( __i386__ ) movl FRAME_SIZE( STACKP ), %ecx movss FRAME_SIZE( STACKP ), %xmm0 #else movd %xmm0, %ecx #endif andl $0x7fffffff, %ecx // |x| subl $1, %ecx // subtract 1. This forces |+-0| to -0 cmpl $0x4afffffe, %ecx // values >= 0x4b000000 - 1 are either integers, NaN or Inf ja 1f // unsigned compare adds 0 to the list cvtps2dq %xmm0, %xmm0 cvtdq2ps %xmm0, %xmm0 #if defined( __i386__ ) movss %xmm0, FRAME_SIZE( STACKP ) #endif 1: #if defined( __i386__ ) flds FRAME_SIZE( STACKP ) #endif ret #if defined( __i386__ ) ENTRY(rint) fldl FIRST_ARG_OFFSET(STACKP) frndint ret #else ENTRY( rint ) movd %xmm0, %rax movl $0x43300000, %edx movl $0x80000000, %ecx shlq $32, %rdx // 0x1.0p52 shlq $32, %rcx // -0.0 andq %rax, %rcx // signof( x ) xorq %rcx, %rax // |x| cmpq %rdx, %rax // if( |x| >= 0x1.0p52 || isnan(x) ) ja 1f // return x orq %rcx, %rdx // copysign( 0x1.0p52, x ) movd %rdx, %xmm1 // copysign( 0x1.0p52, x ) addsd %xmm1, %xmm0 // x + copysign( 0x1.0p52, x ) subsd %xmm1, %xmm0 // x + copysign( 0x1.0p52, x ) - copysign( 0x1.0p52, x ) 1: ret #endif //i386 versions if these functions are in xmm_floor.c //On x86_64 we can take advantage of the REX form of cvtsd2si to produce 64-bit values #if defined( __LP64__ ) ENTRY( lrint ) ENTRY( llrint ) movl $0x43e00000, %eax //Exponent for 0x1.0p63 movd %eax, %xmm1 //copy to low 32-bits of xmm1 psllq $32, %xmm1 //move it to the high 32-bits of the low double in xmm1, to make 0x1.0p63 cmplesd %xmm0, %xmm1 //compare 0x1.0p63 <= x. Since there are no double precision values between LONG_MAX and 0x1.0p63 we don't need to worry about them cvtsd2siq %xmm0, %rax //convert x to long movd %xmm1, %rdx //copy compare result (all 64-bits) to %rdx xorq %rdx, %rax //flip overflow values to 0x7fffffffffffffff ret ENTRY( lrintf ) ENTRY( llrintf ) movl $0x5f000000, %eax //load 0x1.063f movd %eax, %xmm1 //copy to xmm cmpless %xmm0, %xmm1 //compare 0x1.063f <= x cvtss2siq %xmm0, %rdx //convert x to long movd %xmm1, %rax //copy 64 bits of the comparison result to %rdx cdqe //sign extend xorq %rdx, %rax //flip overflow results to 0x7fffffffffffffff ret #else ENTRY( lrintf ) movl $0x4f000000, %eax //load 0x1.0p31f movss (FIRST_ARG_OFFSET)( STACKP ), %xmm0 //load x movd %eax, %xmm1 //copy 0x1.0p31f to xmm1 cmpless %xmm0, %xmm1 //compare 0x1.0p31f <= x. There are no single precision values between INT_MAX and 0x1.0p31f, so no need to worry here. cvtss2si %xmm0, %eax //convert to int movd %xmm1, %edx //move the compare result to edx xorl %edx, %eax //saturate overflow results to 0x7fffffff ret ENTRY( lrint ) movsd (FIRST_ARG_OFFSET)( STACKP ), %xmm0 // load x xorpd %xmm1, %xmm1 // load 0.0f cmpltsd %xmm0, %xmm1 // test 0.0f < x cvtsd2si %xmm0, %eax // convert x to int movd %xmm1, %edx // copy the compare result to %edx xorl %ecx, %ecx // set %ecx to 0 cmp $0x80000000, %eax // check the result to see if it is 0x80000000 -- the overflow result cmovne %ecx, %edx // if the result is not 0x80000000, overwrite the earlier compare result with 0 xorl %edx, %eax // saturate overflow results to 0x7fffffff (was 0x80000000) ret ENTRY( llrintf ) SUBP $12, STACKP movl $0x5f000000, 8(STACKP) //0x1.0p63f xor %edx, %edx flds 8(STACKP) //{0x1.0p63 } flds (FIRST_ARG_OFFSET+12)( STACKP ) //{f, 0x1.0p63} fucomi %ST(1), %ST //{f, 0x1.0p63} f>=0x1.0p63 fistpll (STACKP) //{0x1.0p63} fstp %ST(0) //{} setnb %dl // copy f >= 0x1.0p63 to the d register negl %edx // convert [0,1] to [0,-1] movl (STACKP), %eax // load low 32-bits of the result xorl %edx, %eax // saturate to 0xffffffff if overflow xorl 4(STACKP), %edx // load the high 32-bits of the result and saturate to 0x7fffffff if overflow ADDP $12, STACKP ret ENTRY( llrint ) SUBP $12, STACKP movl $0x5f000000, 8(STACKP) //0x1.0p63f xor %edx, %edx flds 8(STACKP) //{0x1.0p63 } fldl (FIRST_ARG_OFFSET+12)( STACKP ) //{f, 0x1.0p63} fucomi %ST(1), %ST //{f, 0x1.0p63} f>=0x1.0p63 fistpll (STACKP) //{0x1.0p63} fstp %ST(0) //{} setnb %dl // copy f >= 0x1.0p63 to the d register negl %edx // convert [0,1] to [0,-1] movl (STACKP), %eax // load low 32-bits of the result xorl %edx, %eax // saturate to 0xffffffff if overflow xorl 4(STACKP), %edx // load the high 32-bits of the result and saturate to 0x7fffffff if overflow ADDP $12, STACKP ret #endif