#ifdef NO_UNDERLINE
# define _prev prev
# define _window window
# define _match_start match_start
# define _prev_length prev_length
# define _good_match good_match
# define _nice_match nice_match
# define _strstart strstart
# define _max_chain_length max_chain_length
# define _match_init match_init
# define _longest_match longest_match
#endif
#ifdef DYN_ALLOC
error: DYN_ALLOC not yet supported in match.s
#endif
#if defined(i386) || defined(_I386) || defined(__i386) || defined(__i386__)
.file "match.S"
#define MAX_MATCH 258
#define MAX_MATCH2 $128
#define MIN_MATCH 3
#define WSIZE $32768
#define MAX_DIST WSIZE - MAX_MATCH - MIN_MATCH - 1
.globl _match_init
.globl _longest_match
.text
_match_init:
ret
_longest_match:
#define cur_match 20(%esp)
push %ebp
push %edi
push %esi
push %ebx
mov cur_match,%esi
mov _max_chain_length,%ebp
mov _strstart,%edi
mov %edi,%edx
sub MAX_DIST,%edx
jae limit_ok
sub %edx,%edx
limit_ok:
add $2+_window,%edi
mov _prev_length,%ebx
movw -3(%ebx,%edi),%ax
movw -2(%edi),%cx
cmp _good_match,%ebx
jb do_scan
shr $2,%ebp
jmp do_scan
.align 4
long_loop:
movw -3(%ebx,%edi),%ax
movw -2(%edi),%cx
short_loop:
and WSIZE-1, %esi
movw _prev(%esi,%esi),%si
cmp %edx,%esi
jbe the_end
dec %ebp
jz the_end
do_scan:
cmpw _window-1(%ebx,%esi),%ax
jne short_loop
cmpw _window(%esi),%cx
jne short_loop
lea _window+2(%esi),%esi
mov %edi,%eax
mov MAX_MATCH2,%ecx
rep; cmpsw
je maxmatch
mismatch:
movb -2(%edi),%cl
subb -2(%esi),%cl
xchg %edi,%eax
sub %edi,%eax
sub %eax,%esi
sub $2+_window,%esi
subb $1,%cl
adc $0,%eax
cmp %ebx,%eax
jle long_loop
mov %esi,_match_start
mov %eax,%ebx
cmp _nice_match,%eax
jl long_loop
the_end:
mov %ebx,%eax
pop %ebx
pop %esi
pop %edi
pop %ebp
ret
maxmatch:
cmpsb
jmp mismatch
#else
#if defined(m68k)||defined(mc68k)||defined(__mc68000__)||defined(__MC68000__)
# ifndef mc68000
# define mc68000
# endif
#endif
#if defined(__mc68020__) || defined(__MC68020__) || defined(sysV68)
# ifndef mc68020
# define mc68020
# endif
#endif
#if defined(mc68020) || defined(mc68000)
#if (defined(mc68020) || defined(NeXT)) && !defined(UNALIGNED_OK)
# define UNALIGNED_OK
#endif
#ifdef sysV68
# define GLOBAL(symbol) global symbol
# define TEXT text
# define FILE(filename) file filename
# define invert_maybe(src,dst) dst,src
# define imm(data) &data
# define reg(register) %register
# define addl add.l
# define addql addq.l
# define blos blo.b
# define bhis bhi.b
# define bras bra.b
# define clrl clr.l
# define cmpmb cmpm.b
# define cmpw cmp.w
# define cmpl cmp.l
# define lslw lsl.w
# define lsrl lsr.l
# define movel move.l
# define movew move.w
# define moveb move.b
# define moveml movem.l
# define subl sub.l
# define subw sub.w
# define subql subq.l
# define IndBase(bd,An) (bd,An)
# define IndBaseNdxl(bd,An,Xn) (bd,An,Xn.l)
# define IndBaseNdxw(bd,An,Xn) (bd,An,Xn.w)
# define predec(An) -(An)
# define postinc(An) (An)+
#else
# define GLOBAL(symbol) .globl symbol
# define TEXT .text
# define FILE(filename) .even
# define invert_maybe(src,dst) src,dst
# if defined(sun) || defined(mc68k)
# define imm(data) #data
# else
# define imm(data) \#data
# endif
# define reg(register) register
# define blos bcss
# if defined(sun) || defined(mc68k)
# define movel movl
# define movew movw
# define moveb movb
# endif
# define IndBase(bd,An) An@(bd)
# define IndBaseNdxl(bd,An,Xn) An@(bd,Xn:l)
# define IndBaseNdxw(bd,An,Xn) An@(bd,Xn:w)
# define predec(An) An@-
# define postinc(An) An@+
#endif
#define Best_Len reg(d0)
#define Cur_Match reg(d1)
#define Loop_Counter reg(d2)
#define Scan_Start reg(d3)
#define Scan_End reg(d4)
#define Limit reg(d5)
#define Chain_Length reg(d6)
#define Scan_Test reg(d7)
#define Scan reg(a0)
#define Match reg(a1)
#define Prev_Address reg(a2)
#define Scan_Ini reg(a3)
#define Match_Ini reg(a4)
#define Stack_Pointer reg(sp)
#define MAX_MATCH 258
#define MIN_MATCH 3
#define WSIZE 32768
#define MAX_DIST (WSIZE - MAX_MATCH - MIN_MATCH - 1)
GLOBAL (_match_init)
GLOBAL (_longest_match)
TEXT
FILE ("match.S")
_match_init:
rts
#ifdef UNALIGNED_OK
# define pushreg 15928
# define popreg 7292
#else
# define pushreg 16184
# define popreg 7420
#endif
_longest_match:
movel IndBase(4,Stack_Pointer),Cur_Match
moveml imm(pushreg),predec(Stack_Pointer)
movel _max_chain_length,Chain_Length
movel _prev_length,Best_Len
movel imm(_prev),Prev_Address
movel imm(_window+MIN_MATCH),Match_Ini
movel _strstart,Limit
movel Match_Ini,Scan_Ini
addl Limit,Scan_Ini
subw imm(MAX_DIST),Limit
bhis L__limit_ok
clrl Limit
L__limit_ok:
cmpl invert_maybe(_good_match,Best_Len)
blos L__length_ok
lsrl imm(2),Chain_Length
L__length_ok:
subql imm(1),Chain_Length
#ifdef UNALIGNED_OK
movew IndBase(-MIN_MATCH,Scan_Ini),Scan_Start
movew IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
#else
moveb IndBase(-MIN_MATCH,Scan_Ini),Scan_Start
lslw imm(8),Scan_Start
moveb IndBase(-MIN_MATCH+1,Scan_Ini),Scan_Start
moveb IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
lslw imm(8),Scan_End
moveb IndBaseNdxw(-MIN_MATCH,Scan_Ini,Best_Len),Scan_End
#endif
bras L__do_scan
L__long_loop:
#ifdef UNALIGNED_OK
movew IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
#else
moveb IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End
lslw imm(8),Scan_End
moveb IndBaseNdxw(-MIN_MATCH,Scan_Ini,Best_Len),Scan_End
#endif
L__short_loop:
lslw imm(1),Cur_Match
movew IndBaseNdxl(0,Prev_Address,Cur_Match),Cur_Match
cmpw invert_maybe(Limit,Cur_Match)
dbls Chain_Length,L__do_scan
bras L__return
L__do_scan:
movel Match_Ini,Match
addl Cur_Match,Match
#ifdef UNALIGNED_OK
cmpw invert_maybe(IndBaseNdxw(-MIN_MATCH-1,Match,Best_Len),Scan_End)
bne L__short_loop
cmpw invert_maybe(IndBase(-MIN_MATCH,Match),Scan_Start)
bne L__short_loop
#else
moveb IndBaseNdxw(-MIN_MATCH-1,Match,Best_Len),Scan_Test
lslw imm(8),Scan_Test
moveb IndBaseNdxw(-MIN_MATCH,Match,Best_Len),Scan_Test
cmpw invert_maybe(Scan_Test,Scan_End)
bne L__short_loop
moveb IndBase(-MIN_MATCH,Match),Scan_Test
lslw imm(8),Scan_Test
moveb IndBase(-MIN_MATCH+1,Match),Scan_Test
cmpw invert_maybe(Scan_Test,Scan_Start)
bne L__short_loop
#endif
movew imm((MAX_MATCH-MIN_MATCH+1)-1),Loop_Counter
movel Scan_Ini,Scan
L__scan_loop:
cmpmb postinc(Match),postinc(Scan)
dbne Loop_Counter,L__scan_loop
subl Scan_Ini,Scan
addql imm(MIN_MATCH-1),Scan
cmpl invert_maybe(Best_Len,Scan)
bls L__short_loop
movel Scan,Best_Len
movel Cur_Match,_match_start
cmpl invert_maybe(_nice_match,Best_Len)
blos L__long_loop
L__return:
moveml postinc(Stack_Pointer),imm(popreg)
rts
#else
# if defined (__ia64__)
#include <endian.h>
#if __BYTE_ORDER == ____BIG_ENDIAN
#define first shl
#define second shr.u
#define count czx1.l
#else
#define first shr.u
#define second shl
#define count czx1.r
#endif
#define s_vmatch0 r32
#define s_vmatch1 r33
#define s_vmatbst r34
#define s_vmatbst1 r35
#define s_amatblen r36
#define s_tm1 r56
#define s_tm2 r57
#define s_tm3 r58
#define s_tm4 r59
#define s_tm5 r60
#define s_tm6 r61
#define s_tm7 r62
#define s_tm8 r63
#define s_vlen r31
#define s_vstrstart r30
#define s_vchainlen r29
#define s_awinbest r28
#define s_vcurmatch r27
#define s_vlimit r26
#define s_vscanend r25
#define s_vscanend1 r24
#define s_anicematch r23
#define s_vscan0 r22
#define s_vscan1 r21
#define s_aprev r20
#define s_awindow r19
#define s_amatchstart r18
#define s_ascan r17
#define s_amatch r16
#define s_wmask r15
#define s_ascanend r14
#define s_vspec_cmatch r11 // next iteration
#define s_lcsave r10
#define s_prsave r9
#define s_vbestlen r8 // return register
#define s_vscan3 r3
#define s_vmatch3 r2
#define p_no p2
#define p_yes p3
#define p_shf p4 //
#define p_bn2 p5 // Use in loop (indicating bestlen != 2)
#define p_nbs p9 // not new best_len
#define p_nnc p10 // not nice_length
#define p_ll p11
#define p_end p12
#define MAX_MATCH 258
#define MIN_MATCH 4
#define WSIZE 32768
#define MAX_DIST WSIZE - MAX_MATCH - MIN_MATCH - 1
#define R_INPUT 1
#define R_LOCAL 31
#define R_OUTPUT 0
#define R_ROTATING 24
#define MLAT 3
#define SHLAT 2
#define mova mov
#define movi0 mov
#define cgtu cmp.gt.unc
#define cgeu cmp.ge.unc
#define cneu cmp.ne.unc
.global longest_match
.proc longest_match
.align 32
longest_match:
.prologue
{.mmi
alloc r2=ar.pfs,R_INPUT,R_LOCAL,R_OUTPUT,R_ROTATING
.rotr scan[MLAT+2], match[MLAT+2], shscan0[SHLAT+1], \
shscan1[SHLAT+1], shmatch0[SHLAT+1], shmatch1[SHLAT+1]
.rotp lc[MLAT+SHLAT+2]
mova s_vspec_cmatch=in0 add s_tm1=@gprel(strstart),gp }{.mmi
add s_tm3=@gprel(prev_length),gp add s_tm5=@ltoff(window),gp add s_tm6=@ltoff(prev),gp ;;
}{.mmb ld4 s_vstrstart=[s_tm1] ld4 s_vbestlen=[s_tm3] brp.loop.imp .cmploop,.cmploop+48
}{.mli
add s_tm2=@gprel(max_chain_length),gp movl s_wmask=WSIZE-1
;;
}{.mmi ld8 s_aprev=[s_tm6] ld8 s_awindow=[s_tm5] .save pr, s_prsave
movi0 s_prsave=pr }{.mmi
add s_tm4=@gprel(good_match),gp add s_tm7=@ltoff(nice_match),gp add s_tm8=@ltoff(match_start),gp ;;
}{.mmi ld8 s_anicematch=[s_tm7] ld8 s_amatchstart=[s_tm8] .save ar.lc, s_lcsave
movi0 s_lcsave=ar.lc }{.mmi
.body
add s_tm1=-(MAX_MATCH + MIN_MATCH),s_wmask cmp.eq p_ll,p0=r0,r0 mova s_vcurmatch=s_vspec_cmatch
;;
}{.mmi ld4 s_vchainlen=[s_tm2] ld4 s_tm4=[s_tm4] add s_ascan=s_awindow,s_vstrstart }{.mmi
sub s_vlimit=s_vstrstart, s_tm1 add s_amatch=s_awindow,s_vspec_cmatch and s_vspec_cmatch =s_vspec_cmatch,s_wmask
;;
}{.mmi add s_amatblen=s_amatch,s_vbestlen cneu p_bn2,p0=2,s_vbestlen add s_ascanend=s_ascan,s_vbestlen }{.mmi
ld1 s_vscan0=[s_ascan],1 ld1 s_vmatch0=[s_amatch],1
cgtu p0,p_no=s_vlimit,r0 ;;
}{.mmi ld1.nt1 s_vscan1=[s_ascan],2 ld1.nt1 s_vmatch1=[s_amatch],2
add s_awinbest=s_awindow,s_vbestlen ;;
}{.mmi ld1.nt1 s_vscanend=[s_ascanend],-1 ld1.nt1 s_vmatbst=[s_amatblen],-1
(p_no) mova s_vlimit=r0
;;
}{.mmi (p_bn2) ld1.nt1 s_vscanend1=[s_ascanend],1 (p_bn2) ld1.nt1 s_vmatbst1=[s_amatblen]
shladd s_vspec_cmatch =s_vspec_cmatch,1,s_aprev
}{.mmi
cgeu p_shf,p0=s_vbestlen,s_tm4 ;;
}{.mmi ld1.nt1 s_vscan3=[s_ascan]
ld2.nt1 s_vspec_cmatch=[s_vspec_cmatch]
mova s_vlen=3
}{.mmi
(p_shf) shr.u s_vchainlen=s_vchainlen,2 ;;
}{.mmi ld1.nt1 s_vmatch3=[s_amatch]
cmp.eq.and p_ll,p0=s_vmatch0,s_vscan0
cmp.eq.and p_ll,p0=s_vmatbst,s_vscanend
}{.mib
cmp.eq.and p_ll,p0=s_vmatch1,s_vscan1
(p_bn2) cmp.eq.and p_ll,p0=s_vmatbst1,s_vscanend1
(p_ll) br.cond.dpnt.many .test_more
;;
}
.next_iter:
{.mmi add s_amatch=s_awindow,s_vspec_cmatch mov s_vcurmatch=s_vspec_cmatch add s_vchainlen=-1,s_vchainlen }{.mib
cmp.le.unc p_end,p0=s_vspec_cmatch,s_vlimit
and s_vspec_cmatch=s_vspec_cmatch,s_wmask
(p_end) br.cond.dptk.many .terminate
;;
}{.mmi ld1 s_vmatch0=[s_amatch],1 shladd s_vspec_cmatch=s_vspec_cmatch,1,s_aprev
cmp.eq.unc p_end,p0=s_vchainlen,r0
} {.mib
nop.m 0
add s_amatblen=s_awinbest,s_vcurmatch (p_end) br.cond.dptk.many .terminate
;;
}{.mmi ld2.nt1 s_vspec_cmatch=[s_vspec_cmatch] ;;
}{.mmi ld1.nt1 s_vmatbst=[s_amatblen],-1 cmp.ne.unc p_ll,p0=r0,r0 ;;
}{.mmi ld1.nt1 s_vmatch1=[s_amatch],2
;;
(p_bn2) ld1.nt1 s_vmatbst1=[s_amatblen] }{.mib cmp.ne.or p_ll,p0=s_vmatch0,s_vscan0
cmp.ne.or p_ll,p0=s_vmatbst,s_vscanend
(p_ll) br.cond.dptk.many .next_iter
;;
}{.mmi ld1.nt1 s_vmatch3=[s_amatch]
mova s_vlen=3
nop.i 0
}{.mib
cmp.ne.or p_ll,p0=s_vmatch1,s_vscan1
(p_bn2) cmp.ne.or p_ll,p0=s_vmatbst1,s_vscanend1
(p_ll) br.cond.dptk.many .next_iter
;;
}
.test_more:
{.mmi and s_tm3=7,s_ascan and s_tm4=7,s_amatch movi0 ar.ec=MLAT+SHLAT+2 }{.mib
cmp.ne.unc p_no,p0=s_vscan3,s_vmatch3 (p_no) br.cond.dptk.many .only3
;;
}{.mmi and s_tm1=-8,s_ascan shladd s_tm3=s_tm3,3,r0
movi0 ar.lc=31 }{.mib
and s_tm2=-8,s_amatch shladd s_tm4=s_tm4,3,r0
nop.b 0
;;
}{.mmi ld8.nt1 scan[1]=[s_tm1],8 sub s_tm5=64,s_tm3 movi0 pr.rot=1<<16
}{.mmi
ld8.nt1 match[1]=[s_tm2],8 sub s_tm6=64,s_tm4 add s_vlen=-8,s_vlen ;;
}
.align 32
.cmploop:
{.mmi (lc[0]) ld8 scan[0]=[s_tm1],8 (lc[MLAT+SHLAT+1]) add s_vlen=8,s_vlen
(lc[MLAT]) first shscan0[0]=scan[MLAT+1],s_tm3
}{.mib
(lc[MLAT+SHLAT+1]) cmp.ne.unc p_no,p0=s_tm7,s_tm8 (lc[MLAT]) first shmatch0[0]=match[MLAT+1],s_tm4
(p_no) br.cond.dpnt.many .mismatch
;;
}{.mii (lc[0]) ld8 match[0]=[s_tm2],8
(lc[MLAT]) second shscan1[0]=scan[MLAT],s_tm5
(lc[MLAT]) second shmatch1[0]=match[MLAT],s_tm6
}{.mmb
(lc[MLAT+SHLAT]) or s_tm7=shscan0[SHLAT],shscan1[SHLAT]
(lc[MLAT+SHLAT]) or s_tm8=shmatch0[SHLAT],shmatch1[SHLAT]
br.ctop.dptk.many .cmploop
;;
}{.mfi
mov s_vlen=258
nop.f 0
}{.mfi
nop.f 0 ;;
}
.mismatch:
{.mii (p_no) pcmp1.eq s_tm2=s_tm7,s_tm8 nop.i 0
;;
(p_no) count s_tm1=s_tm2
;;
}{.mib (p_no) add s_vlen=s_vlen,s_tm1 nop.i 0
clrrrb
;;
}
.only3:
{.mib cmp.gt.unc p0,p_nbs=s_vlen,s_vbestlen (p_nbs) br.cond.dpnt.many .next_iter ;;
}{.mmi ld4 s_tm7=[s_anicematch] st4 [s_amatchstart]= s_vcurmatch
add s_ascanend=s_ascan,s_vlen ;;
}{.mmi mova s_vbestlen=s_vlen
add s_ascanend=-3,s_ascanend ;;
}{.mmi ld1 s_vscanend=[s_ascanend],-1 add s_awinbest=s_awindow,s_vbestlen cmp.ne.unc p_bn2,p0=2,s_vbestlen ;;
}{.mib ld1.nt1 s_vscanend1=[s_ascanend],1
cmp.lt.unc p_nnc,p0=s_vlen,s_tm7 (p_nnc) br.cond.dptk.many .next_iter
;;
}
.terminate:
{.mii nop.m 0
movi0 ar.lc=s_lcsave
movi0 pr=s_prsave,-1
}{.mbb
nop.m 0
nop.b 0
br.ret.sptk.many rp ;;
}
.endp
.global match_init
.proc match_init
match_init:
sub ret0=ret0,ret0
br.ret.sptk.many rp
.endp
# else
error: this asm version is for 386 or 680x0 or ia64 only
# endif
#endif
#endif