# lroundl.s   [plain text]

```
/*
*  lroundl.s
*
*      by Ian Ollmann
*
*
*/

#include "abi.h"
#include <machine/asm.h>

.align 2
.literal4
two63:          .long           0x5f000000
mtwo63:         .long           0xdf000000
one:            .long           1
inf:            .long           0x7f800000

.align 3
.literal8
cutoff32:        .double     2147483647.5            // 2**31-0.5
mcutoff32:       .double    -2147483648.5            // 2**31-0.5

.align 4
.literal16

.text
#if defined( __x86_64__ )

ENTRY( lroundl )
ENTRY( llroundl )
movswl  8+FRAME_SIZE( STACKP ), %edx
andl    \$0x7fff,                %edx        // exponent of x
movq    FRAME_SIZE( STACKP ),   %rax
subl    \$0x3ffe,                %edx        // push exponents less than -1 negative
fldt    FRAME_SIZE( STACKP )                // { x }
cmpl    \$(63+1),                %edx        // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) )
jae     1f                                  //      goto 1

// 0.5 <= |x| < 0x1.0p63
fldt    cutoff( %rip )                      // { 0x1.0p63 - 0.5, x }
fucomip %st(1),         %st(0)              // { x }
je      3f

//Shift the significand right so that units bit is at units + 1 position
movl    \$63,                    %ecx
subl    %edx,                   %ecx        // 63 - (exponent+1)
shrq    %cl,                    %rax        // shift so that the units bit is at the +1 position
movq    %rax,                   %rdx        // set aside a copy
shrq    \$1,                     %rax        // finish the shift with shift right by 1 bit -- we need to do 64-bit shifts here at times and not possible with ISA
andq    \$1,                     %rdx        // isolate the leading fractional bit
addq    %rdx,                   %rax        // round the result up.

//fix sign
movswq  8+FRAME_SIZE( STACKP ), %rdx        // read the sign + exponent
sarq    \$16,                    %rdx        // remove exponent
xorq    %rdx,                   %rax        // flip the sign of the result
subq    %rdx,                   %rax        // correct for 2's complement

// set inexact as necessary
fabs                                        // { |x| }
fadds   two63(%rip)                         // { |x| + 0x1.0p63 }   set inexact as necessary
fstp    %st(0)                              // throw away numerical result.

ret

//  |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x)
1:  jge     2f

// |x| < 0.5
xorq    %rax,                   %rax
fistpl  FRAME_SIZE( STACKP )                // set inexact  as necessary
ret

//  |x| >= 0x1.0p63 || isnan(x)
2:  movswq  8+FRAME_SIZE( STACKP ), %rdx
flds    mtwo63( %rip )
fucomip %st(1), %st(0)
je      4f
fistpl  FRAME_SIZE( STACKP )                // set invalid
shrq    \$63,                    %rdx
subq    \$1,                     %rdx
movq    \$0x8000000000000000,    %rax
xorq    %rdx,                   %rax
ret

//  0x1.0p63 - 0.5, positive overflow
3:  fistpl  FRAME_SIZE( STACKP )                // set invalid
movq    \$0x7fffffffffffffff,    %rax
ret

//  -0x1.0p63 or nan
4:  jp      5f
fstp    %st(0)
movq    \$0x8000000000000000,    %rax
ret

//  nan
5:  fistpl  FRAME_SIZE( STACKP )                // set invalid
movq    \$0x8000000000000000,    %rax
ret

#else

ENTRY( lroundl )
movswl  8+FRAME_SIZE( STACKP ), %edx
andl    \$0x7fff,                %edx        // exponent of x
movl    4+FRAME_SIZE( STACKP ), %eax
subl    \$0x3ffe,                %edx        // push exponents less than -1 negative
fldt    FRAME_SIZE( STACKP )                // { x }
cmpl    \$(31+1),                %edx        // if( |x| >= 0x1.0p31 || |x| < 0.5 || isnan(x) )
jae     1f                                  //      goto 1

//
call    0f
0:  popl    %ecx
fldl    (cutoff32-0b)(%ecx)
fucomip %st(1),     %st(0)
jbe     3f

// set inexact
fabs
fstp    %st(0)

// round
movl    \$31,                    %ecx
subl    %edx,                   %ecx
shrl    %cl,                    %eax
movl    %eax,                   %edx
shrl    \$1,                     %eax
andl    \$1,                     %edx

// fix sign
movswl  8+FRAME_SIZE( STACKP ), %edx
sarl    \$16,                    %edx
xorl    %edx,                   %eax
subl    %edx,                   %eax
ret

1:  jge     2f

// |x| < 0.5
xorl    %eax,                   %eax
fistpl  FRAME_SIZE( STACKP )                // set inexact  as necessary
ret

2:  movswl  8+FRAME_SIZE( STACKP),  %edx
call    0f
0:  popl    %ecx
fldl    ( mcutoff32-0b)(%ecx)
fucomip %st(1),         %st(0)
jae     4f
fldl    ( cutoff32-0b )(%ecx)
fucomip %st(1),         %st(0)
jbe     3f

// non overflowing result
shrl    \$31,                    %edx
subl    \$1,                     %edx
movl    \$0x80000000,            %eax
xorl    %edx,                   %eax

//set inexact
fabs
fstp    %st(0)
ret

//  positive overflow
3:  jp      5f
fistps  FRAME_SIZE( STACKP )
movl    \$0x7fffffff,            %eax
ret

// negative overflow
4:  fistps  FRAME_SIZE( STACKP )
movl    \$0x80000000,            %eax
ret

// nan
5:  fistpl  FRAME_SIZE( STACKP )
movl    \$0x80000000,            %eax
ret

ENTRY( llroundl )
movswl  8+FRAME_SIZE( STACKP ), %edx
andl    \$0x7fff,                %edx        // exponent of x
movq    FRAME_SIZE( STACKP ),   %xmm0
subl    \$0x3ffe,                %edx        // push exponents less than -1 negative
fldt    FRAME_SIZE( STACKP )                // { x }
cmpl    \$(63+1),                %edx        // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) )
jae     1f                                  //      goto 1

call    0f
0:  popl    %ecx

// 0.5 <= |x| < 0x1.0p63
fldt    (cutoff-0b)( %ecx )                 // { 0x1.0p63 - 0.5, x }
fucomip %st(1),         %st(0)              // { x }
je      3f

//Shift the significand right so that units bit is at units + 1 position
movl    \$63,                    %eax
movd    (one-0b)(%ecx),         %xmm2       // 1
subl    %edx,                   %eax        // 63 - (exponent+1)
movd    %eax,                   %xmm1
psrlq   %xmm1,                  %xmm0       // shift so that the units bit is at the +1 position
movq    %xmm0,                  %xmm1       // set aside a copy
psrlq   \$1,                     %xmm0       // finish the shift with shift right by 1 bit -- we need to do 64-bit shifts here at times and not possible with ISA
pand    %xmm2,                  %xmm1       // isolate the leading fractional bit
paddq   %xmm1,                  %xmm0       // round the result up.

// set inexact as necessary
fabs                                        // { |x| }
fadds   (two63-0b)(%ecx)                         // { |x| + 0x1.0p63 }   set inexact as necessary
fstp    %st(0)                              // throw away numerical result.

//fix sign
movswl  8+FRAME_SIZE( STACKP ), %eax        // read the sign + exponent
shrl    \$31,                    %eax        // remove exponent
movq    (sign-0b)(%ecx, %eax,8), %xmm1
pxor    %xmm1,                  %xmm0
psubq   %xmm1,                  %xmm0
movd    %xmm0,                  %eax
psrlq   \$32,                    %xmm0
movd    %xmm0,                  %edx

ret

//  |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x)
1:  jge     2f

// |x| < 0.5
xorl    %eax,                   %eax
xorl    %edx,                   %edx
fistpl  FRAME_SIZE( STACKP )                // set inexact  as necessary
ret

//  |x| >= 0x1.0p63 || isnan(x)
2:  movswl  8+FRAME_SIZE( STACKP ), %eax
call    0f
0:  popl    %ecx
flds    (mtwo63-0b)( %ecx )
fucomip %st(1), %st(0)
je      4f
fistpl  FRAME_SIZE( STACKP )                // set invalid
shrl    \$31,                    %eax
subl    \$1,                     %eax
movl    \$0x80000000,            %edx
xorl    %eax,                   %edx
ret

//  0x1.0p63 - 0.5, positive overflow
3:  fistpl  FRAME_SIZE( STACKP )                // set invalid
movl    \$-1,                    %eax
movl    \$0x7fffffff,            %edx
ret

//  -0x1.0p63 or nan
4:  jp      5f
fstp    %st(0)
movl    \$0x80000000,            %edx
xorl    %eax,                   %eax
ret

//  nan
5:  fistpl  FRAME_SIZE( STACKP )                // set invalid
movl    \$0x80000000,            %edx
xorl    %eax,                   %eax
ret

#endif
```