#if (defined __i386__)
/* this assembly was 1st compiled from inffast.c (assuming POSTINC defined, OFF=0) and then hand optimized */
.cstring
LC0:
.ascii "invalid distance too far back\0"
LC1:
.ascii "invalid distance code\0"
LC2:
.ascii "invalid literal/length code\0"
.text
.align 4,0x90
#ifdef INFLATE_STRICT
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
#endif
.globl _inflate_fast
_inflate_fast:
// set up ebp to refer to arguments strm and start
pushl %ebp
movl %esp, %ebp
// push edi/esi/ebx into stack
pushl %edi
pushl %esi
pushl %ebx
// allocate for local variables 92-12=80, + 12 to align %esp to 16-byte boundary
subl $92, %esp
movl 8(%ebp), %ebx
/* definitions to help code readability */
#define bits %edi
#define strm %ebx
#define state 28(strm) // state = (struct inflate_state FAR *)strm->state #define last -80(%ebp) // last = in + (strm->avail_in - 5) #define beg -76(%ebp) // beg = out - (start - strm->avail_out) #define wsize -68(%ebp) // wsize = state->wsize #define write -60(%ebp) // write = state->write #define hold -52(%ebp) // hold = state->hold #define dcode -44(%ebp) // dcode = state->distcode #define dmask -36(%ebp) // dmask = (1U << state->distbits) - 1 #define dmax -20(%ebp)
#define dist -16(%ebp) // dist
#define write_wsize -24(%ebp) // write+wsize
#define write_1 -88(%ebp) // write-1
#define op -92(%ebp) // op
movl (strm), %eax // strm->next_in
movl %eax, in // in = strm->next_in - OFF subl $5, %eax // in - 5 addl %ecx, %eax // in + (strm->avail_in - 5)
movl 12(strm), %esi // strm->next_out
movl %esi, out // out = strm->next_out - OFF movl 16(strm), %ecx // strm->avail_out
movl %esi, %eax // out
subl 12(%ebp), %eax // out - start
addl %ecx, %eax // out - (start - strm->avail_out)
leal -257(%esi,%ecx), %ecx // out + (strm->avail_out - 257)
movl state, %edx
#ifdef INFLATE_STRICT
movl 20(%edx), %ecx // state->dmax
movl %ecx, dmax // dmax = state->dmax
movl 40(%edx), %ecx // state->wsize
movl %ecx, wsize // wsize = state->wsize movl 44(%edx), %ecx // state->whave
movl %ecx, whave // whave = state->whave movl 48(%edx), %esi // state->write
movl %esi, write // write = state->write movl 52(%edx), %eax // state->window
movl %eax, window // window = state->window
movl 56(%edx), %ecx // state->hold
movl %ecx, hold // hold = state->hold
movl 60(%edx), bits // bits = state->bits movl 76(%edx), %esi // state->lencode
movl %esi, lcode // lcode = state->lencode movl 80(%edx), %eax // state->distcode
movl %eax, dcode // dcode = state->distcode movl 84(%edx), %ecx // state->lenbits
movl $1, %eax
movl %eax, %esi // a copy of 1
sall %cl, %esi // 1 << state->lenbits
decl %esi // (1U << state->lenbits) - 1
movl 88(%edx), %ecx // state->distbits
sall %cl, %eax // 1 << state->distbits
decl %eax // (1U << state->distbits) - 1
// these 2 might be used often, precomputed and saved in stack
movl write, %eax
addl wsize, %eax
movl %eax, write_wsize // write+wsize
movl write, %edx
decl %edx
movl %edx, write_1 // write-1
L_do_while_loop: // do {
cmpl $15, bits
jae bits_ge_15 // if (bits < 15) {
#if 0
leal 8(bits), %esi // esi = bits+8
movl in, %eax // eax = in
movzbl (%eax), %edx // edx = *in++
movl bits, %ecx // cl = bits
sall %cl, %edx // 1st *in << bits
addl hold, %edx // hold += 1st *in << bits
movzbl 1(%eax), %eax // 2nd *in
movl %esi, %ecx // cl = bits+8
sall %cl, %eax // 2nd *in << (bits+8)
addl %eax, %edx // hold += 2nd *in << (bits+8)
movl %edx, hold // update hold
addl $2, in // in += 2
addl $16, bits // bits += 16 /* from simulation, this code segment performs better than the other case
possibly, we are more often hit with aligned memory access */
movl in, %ecx // unsigned short *inp = (unsigned short *) (in+OFF) movl bits, %ecx // bits
sall %cl, %eax // *((unsigned short *) in) << bits
addl %eax, hold // hold += (unsigned long) *((unsigned short *) in) << bits addl $16, bits // bits += 16
bits_ge_15: // } /* bits < 15 */
movl hold, %eax // hold
andl lmask, %eax // hold & lmask movl (%esi,%eax,4), %eax // this = lcode[hold&lmask] .align 4,0x90
op_nonzero:
movzbl %al, %ecx // a copy of op to cl
testb $16, %cl // if op&16
jne Llength_base // branch to length_base
testb $64, %cl // elif op&64
jne length_2nd_level_else // branch to 2nd level length code else conditions
// 2nd level length code
movl $1, %eax
sall %cl, %eax // 1 << op
decl %eax // ((1<<op) - 1)
andl hold, %eax // hold & ((1U << op) - 1)
movzwl %si, %ecx // this.val
addl %ecx, %eax // this.val + (hold & ((1U << op) - 1))
movl lcode, %ecx // lcode[] : 4-byte aligned
movl (%ecx,%eax,4), %eax // this = lcode[this.val + (hold & ((1U << op) - 1))]dolen:
movl %eax, %esi // make a copy of this (val 16-bit, bits 8-bit, op 8-bit)
shrl $16, %esi // %esi = this.val shrl %cl, hold // hold >>= op testb %al, %al // op = (unsigned)(this.op)
movl %esi, %ecx // this.val movb %cl, (%eax) // PUP(out) = (unsigned char)(this.val) movl %eax, out // save out
L_tst_do_while_loop_end:
movl last, %eax // last
cmpl %eax, in // in vs last
jae return_unused_bytes // branch to return_unused_bytes if in >= last
movl end, %edx // end
cmpl %edx, out // out vs end
jb L_do_while_loop // branch to do loop if out < end
return_unused_bytes:
movl bits, %eax // bits
shrl $3, %eax // len = bits >> 3
movl in, %edx // in
subl %eax, %edx // in -= len
sall $3, %eax // len << 3
movl bits, %ecx // bits
subl %eax, %ecx // bits -= len << 3
movl %edx, (strm) // strm->next_in = in + OFF movl %eax, 12(strm) // strm->next_out = out + OFF cmpl %edx, last // last vs in
jbe L67 // if (last <= in) branch to L67 and return to L69
movl last, %eax // last
addl $5, %eax // 5 + last
subl %edx, %eax // 5 + last - in
L69:
movl %eax, 4(strm) // update strm->avail_in
movl end, %eax
cmpl %eax, out // out vs end
jae L70 // if (out>=end) branch to L70, and return to L72
addl $257, %eax // 257 + end
subl out, %eax // 257 + end - out
L72:
movl %eax, 16(strm) // update strm->avail_out
movl $1, %eax
sall %cl, %eax // 1 << bits
decl %eax // (1 << bits) -1
andl hold, %eax // hold &= (1U << bits) - 1 movl %eax, 56(%esi) // state->hold = hold
addl $92, %esp // pop out local from stack
// restore saved registers and return
popl %ebx
popl %esi
popl %edi
leave
ret
// this code segment is branched in from op_nonzero, with op in cl and this.value in esi
Llength_base:
movzwl %si, %esi // this instruction might not be needed, pad here to give better performance
movl %esi, len // len = (unsigned)(this.val) movl %ecx, %esi // leave a copy of op at ecx
andl $15, %esi // op&=15 cmpl bits, %esi // op vs bits
jbe Lop_be_bits // if (bits < op) {
movl in, %edx // in
movzbl (%edx), %eax // *in
movl bits, %ecx // bits
sall %cl, %eax // *in << bits
addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits movl %edx, in // update in
addl $8, bits // bits += 8
Lop_be_bits: // }
movl $1, %eax // 1
movl %esi, %ecx // op
sall %cl, %eax // 1 << op
decl %eax // (1<<op)-1
andl hold, %eax // hold & ((1U << op) - 1)
addl %eax, len // len += (unsigned)hold & ((1U << op) - 1) subl %esi, bits // bits -= op cmpl $14, bits // if (bits < 15) {
jbe bits_le_14 // branch to refill 16-bit into hold, and branch back to next
L19: // }
movl hold, %eax // hold
andl dmask, %eax // hold&dmask
movl dcode, %esi // dcode[] : 4-byte aligned
movl (%esi,%eax,4), %eax // this = dcode[hold & dmask]
Lop_16_zero:
testb $64, %cl // op&64
jne Linvalid_distance_code // if (op&64)!=0, branch to invalid distance code
movl $1, %eax // 1
sall %cl, %eax // (1<<op)
decl %eax // (1<<op)-1
andl hold, %eax // (hold & ((1U << op) - 1))
movzwl %dx, %edx // this.val
addl %edx, %eax // this.val + (hold & ((1U << op) - 1))
movl dcode, %edx // dcode[] : 4 byte aligned
movl (%edx,%eax,4), %eax // this = dcode[this.val + (hold & ((1U << op) - 1))] movl %eax, %edx // this : (val 16-bit, bits 8-bit, op 8-bit)
shrl $16, %edx // edx = this.val
movzbl %ah, %ecx // op = (unsigned)(this.bits) subl %ecx, bits // bits -= op testb $16, %cl // op & 16
je Lop_16_zero // if (op&16)==0 goto test op&64
Ldistance_base: // if (op&16) { /* distance base */
andl $15, %ecx // op &= 15 cmpl bits, %ecx // op vs bits
jbe 0f // if (bits < op) {
movl in, %ecx // in
movzbl (%ecx), %eax // *in
movl bits, %ecx // bits
sall %cl, %eax // *in << bits
addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits addl $8, bits // bits += 8
cmpl bits, op // op vs bits
jbe 0f // if (bits < op) {
movl in, %esi // i
movzbl (%esi), %eax // *in
movl bits, %ecx // cl = bits
sall %cl, %eax // *in << bits
addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits movl %esi, in // update in
addl $8, bits // bits += 8
0: // } }
movzwl %dx, %edx // dist = (unsigned)(this.val) movzbl op, %ecx // cl = op
sall %cl, %eax // 1 << op
decl %eax // ((1U << op) - 1)
andl hold, %eax // (unsigned)hold & ((1U << op) - 1)
addl %edx, %eax // dist += (unsigned)hold & ((1U << op) - 1)#ifdef INFLATE_STRICT
cmpl dmax, %eax // dist vs dmax
ja Linvalid_distance_too_far_back // if (dist > dmax) break for invalid distance too far back
#endif
movl %eax, dist // save a copy of dist in stack
shrl %cl, hold // hold >>= op
movl out, %eax
subl beg, %eax // eax = op = out - beg
cmpl %eax, dist // dist vs op
jbe Lcopy_direct_from_output // if (dist <= op) branch to copy direct from output
// if (dist > op) {
movl dist, %ecx // dist
subl %eax, %ecx // esi = op = dist - op jb Linvalid_distance_too_far_back // if (op > whave) break for error movl write, %edx
testl %edx, %edx
jne Lwrite_non_zero // if (write==0) {
movl wsize, %eax // wsize
subl %ecx, %eax // wsize-op
movl window, %esi // from=window-OFF
addl %eax, %esi // from += wsize-op
movl out, %edx // out
cmpl %ecx, len // len vs op
jbe L38 // if !(op < len) skip
subl %ecx, len // len - op
0: // do {
movzbl (%esi), %eax //
movb %al, (%edx) //
incl %edx //
incl %esi // PUP(out) = PUP(from) jne 0b // } while (op) movl %edx, out // update out
movl %edx, %esi // out
subl dist, %esi // esi = from = out - distL38: /* copy from output */
// while (len > 2) {
// PUP(out) = PUP(from) // PUP(out) = PUP(from) // }
// if (len) {
// PUP(out) = PUP(from) // PUP(out) = PUP(from)
movl len, %ecx // len
movl out, %edx // out
subl $3, %ecx // pre-decrement len by 3
jl 1f // if len < 3, branch to 1f for remaining processing
0: // while (len>2) {
movzbl (%esi), %eax
movb %al, (%edx) // PUP(out) = PUP(from) movb %al, 1(%edx) // PUP(out) = PUP(from) movb %al, 2(%edx) // PUP(out) = PUP(from) addl $3, %edx // out += 3 jge 0b // }
movl %edx, out // update out, in case len == 0
1:
addl $3, %ecx // post-increment len by 3
je L_tst_do_while_loop_end // if (len) {
movzbl (%esi), %eax //
movb %al, (%edx) // PUP(out) = PUP(from) movl %edx, out // update out, in case len == 1
cmpl $2, %ecx //
jne L_tst_do_while_loop_end // if len==1, break
movzbl 1(%esi), %eax
movb %al, (%edx) // PUP(out) = PUP(from) movl %edx, out // update out
jmp L_tst_do_while_loop_end // }
.align 4,0x90
length_2nd_level_else:
andl $32, %ecx // test end-of-block
je invalid_literal_length_code // if (op&32)==0, branch for invalid literal/length code break
movl state, %edx // if (op&32), end-of-block is detected
movl $11, (%edx) // state->mode = TYPE
jmp return_unused_bytes
L70:
movl out, %edx // out
subl %edx, end // (end-out)
movl end, %esi // %esi = (end-out) = -(out - end) jmp L72 // return to update state and return
L67: // %edx = in, to return 5 - (in - last) in %eax
subl %edx, last // last - in
movl last, %edx // %edx = last - in = - (in - last) jmp L69 // return to update state and return
bits_le_14:
#if 1
leal 8(bits), %esi // esi = bits+8
movl in, %eax // eax = in
movzbl (%eax), %edx // edx = *in++
movl bits, %ecx // cl = bits
sall %cl, %edx // 1st *in << bits
addl hold, %edx // hold += 1st *in << bits
movzbl 1(%eax), %eax // 2nd *in
movl %esi, %ecx // cl = bits+8
sall %cl, %eax // 2nd *in << (bits+8)
addl %eax, %edx // hold += 2nd *in << (bits+8)
movl %edx, hold // update hold
addl $2, in // in += 2
addl $16, bits // bits += 16#else
/* this code segment does not run as fast as the other original code segment, possibly the processor
need extra time to handle unaligned short access */
movl in, %edx // unsigned short *inp = (unsigned short *) (in+OFF) movl bits, %ecx // bits
sall %cl, %eax // *((unsigned short *) in) << bits
addl %eax, hold // hold += (unsigned long) *((unsigned short *) in) << bits addl $16, %ecx // bits += 16 movl %ecx, bits
jmp L19
#endif
invalid_literal_length_code:
call 0f
0: popl %eax
leal LC2-0b(%eax), %eax
movl %eax, 24(strm)
movl state, %esi
movl $27, (%esi)
jmp return_unused_bytes
Linvalid_distance_code:
call 0f
0: popl %eax
leal LC1-0b(%eax), %eax
movl %eax, 24(strm)
movl state, %eax
movl $27, (%eax)
jmp return_unused_bytes
#ifdef INFLATE_STRICT
.align 4,0x90
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
#endif
Lcopy_direct_from_output:
movl out, %edx // out
subl dist, %edx // from = out - dist
movl out, %ecx // out
movl len, %esi // len
subl $3, %esi // pre-decement len by 3
0: // do {
movzbl (%edx), %eax
movb %al, (%ecx) // PUP(out) = PUP(from) movb %al, 1(%ecx) // PUP(out) = PUP(from) movb %al, 2(%ecx) // PUP(out) = PUP(from) addl $3, %ecx // out += 3
subl $3, %esi // len -= 3
jge 0b // } while (len > 2) addl $3, %esi // post-increment len by 3
je L_tst_do_while_loop_end // if (len) {
movzbl (%edx), %eax
movb %al, (%ecx) // PUP(out) = PUP(from) movl %ecx, out // out++
cmpl $2, %esi //
jne L_tst_do_while_loop_end // if (len>2)
movzbl 1(%edx), %eax
movb %al, (%ecx) // PUP(out) = PUP(from) movl %ecx, out // out++
jmp L_tst_do_while_loop_end // }
.align 4,0x90
Lwrite_non_zero: // %edx = write, %ecx = op
movl window, %esi // from = window - OFF jae Lcontiguous_in_window // if (write >= op) branch to contiguous in window
Lwrap_around_window: // wrap around window
addl write_wsize, %esi // from += write+wsize
subl %ecx, %esi // from += wsize + write - op cmpl %ecx, len // len vs op
jbe L38 // if (len <= op) break to copy from output
subl %ecx, len // len -= op0: // do {
movzbl (%esi), %eax // *from
movb %al, (%edx) // *out
incl %esi // from++
incl %edx // out++
decl %ecx // --op
jne 0b // } while (op) movl %edx, out // save out in case we need to break to L38
movl window, %esi // from = window - OFF cmpl %eax, write // write vs len
jae L38 // if (write >= len) break to L38
movl write, %ecx // op = write
subl %ecx, len // len -= op movzbl (%esi), %eax // *from
movb %al, (%edx) // *out
incl %esi // from++
incl %edx // out++
decl %ecx // --op
jne 0b // } while (op) movl %edx, %esi // from = out
movl %edx, out // save a copy of out
subl dist, %esi // from = out - dist
Lcontiguous_in_window: // contiguous in window, edx = write, %ecx = op
subl %ecx, %edx // write - op
addl %edx, %esi // from += write - op jbe L38 // if (len <= op) break to copy from output
movl out, %edx // out
subl %ecx, len // len -= op0: // do {
movzbl (%esi), %eax // *from
movb %al, (%edx) // *out
incl %esi // from++
incl %edx // out++
decl %ecx // op--
jne 0b // } while (op) movl %edx, out // update out
movl %edx, %esi // from = out
subl dist, %esi // from = out - dist
Linvalid_distance_too_far_back:
call 0f
0: popl %eax
leal LC0-0b(%eax), %eax
movl %eax, 24(strm)
movl state, %ecx
movl $27, (%ecx)
jmp return_unused_bytes
#endif
#if (defined __x86_64__)
.cstring
LC0:
.ascii "invalid distance too far back\0"
LC1:
.ascii "invalid distance code\0"
LC2:
.ascii "invalid literal/length code\0"
.text
.align 4,0x90
#ifdef INFLATE_STRICT
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
#endif
.globl _inflate_fast
_inflate_fast:
// set up rbp
pushq %rbp
movq %rsp, %rbp
// save registers in stack
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
#define strm %r13
#define state %rdi
#define in %r12
#define in_d %r12d
#define out %r10
#define out_d %r10d
#define write %r15d
#define hold %r9
#define holdd %r9d
#define bits %r8d
#define lcode %r14
#define len %ebx
#define from %rcx
#define dmax %r11d
#define last -104(%rbp)
#define beg -96(%rbp)
#define end -88(%rbp)
#define wsize -80(%rbp)
#define whave -76(%rbp)
#define window -72(%rbp)
#define dcode -64(%rbp)
#define lmask -56(%rbp)
#define dmask -112(%rbp)
#define wsize_write -116(%rbp)
#define write_1 -128(%rbp)
#define dist -44(%rbp)
// reserve stack memory for local variables 128-40=88
subq $88, %rsp
movq %rdi, strm
movq 56(%rdi), state // state = (struct inflate_state FAR *)strm->state movl 8(strm), %eax // strm->avail_in
subl $5, %eax // (strm->avail_in - 5)
addq in, %rax // in + (strm->avail_in - 5)
movq %rax, last // last = in + (strm->avail_in - 5)
movq 24(strm), out // out = strm->next_out
movl 32(strm), %eax // strm->avail_out
subl %eax, %esi // (start - strm->avail_out) subq %rsi, %rdx // out - (start - strm->avail_out) subl $257, %eax // (strm->avail_out - 257)
addq out, %rax // out + (strm->avail_out - 257)
#ifdef INFLATE_STRICT
movl 20(state), dmax // dmax = state->dmax
movl 52(state), %ecx // state->wsize
movl %ecx, wsize // wsize = state->wsize movl %ebx, whave // whave = state->whave movq 64(state), %rax // state->window
movq %rax, window // window = state->window movl 80(state), bits // bits = state->bits movq 96(state), lcode // lcode = state->lencode movq %rdx, dcode // dcode = state->distcode movl 116(state), %ecx // state->distbits
movl $1, %eax
movl %eax, %edx // 1
sall %cl, %edx // (1U << state->distbits)
movl 112(state), %ecx // state->lenbits
sall %cl, %eax // (1U << state->lenbits)
decl %eax // (1U << state->lenbits) - 1
movq %rax, lmask // lmask = (1U << state->lenbits) - 1
decl %edx // (1U << state->distbits) - 1
movq %rdx, dmask // dmask = (1U << state->distbits) - 1
movl wsize, %ecx // wsize
addl write, %ecx // wsize + write
movl %ecx, wsize_write // wsize_write = wsize + write
leal -1(%r15), %ebx // write - 1
movq %rbx, write_1 // write_1 = write - 1
L_do_while_loop:
cmpl $14, bits // bits vs 14
ja 0f // if (bits < 15) {
movzwl (in), %eax // read 2 bytes from in
movl bits, %ecx // set up cl = bits
salq %cl, %rax // (*in) << bits
addq %rax, hold // hold += (*in) << bits
addq $2, in // in += 2
addl $16, bits // bits += 16
0: // }
movq lmask, %rax // lmask
andq hold, %rax // hold & lmask
jmp 1f
.align 4,0x90
Lop_nonzero:
movzbl %al, %ecx // op in al and cl
testb $16, %cl // check for length base processing (op&16)
jne L_length_base // if (op&16) branch to length base processing
testb $64, %cl // check for 2nd level length code (op&64==0)
jne L_end_of_block // if (op&64)!=0, branch for end-of-block processing
/* 2nd level length code : (op&64) == 0*/
L_2nd_level_length_code:
movl $1, %eax // 1
sall %cl, %eax // 1 << op
decl %eax // ((1U << op) - 1)
andq hold, %rax // (hold & ((1U << op) - 1))
movzwl %dx, %edx
addq %rdx, %rax // this = lcode[this.val + (hold & ((1U << op) - 1))] movl (lcode,%rax,4), %eax // this = lcode[hold & lmask] movl %eax, %edx // a copy of this
shrl $16, %edx // edx = this.val shrq %cl, hold // hold >>= op testb %al, %al // op = (unsigned)(this.op)L_literal:
movb %dl, (out) // *out = this.val
incq out // out ++
L_do_while_loop_check:
cmpq last, in // in vs last
jae L_return_unused_byte // if in >= last, break to return unused byte processing
cmpq end, out // out vs end
jb L_do_while_loop // back to do_while_loop if out < end
/* return unused bytes (on entry, bits < 8, so in won't go too far back) */
L_return_unused_byte:
movl out_d, %esi
jmp L34
L_length_base: /* al = cl = op, edx = this.val, op&16 = 16 */
movzwl %dx, len // len = (unsigned)(this.val) andl $15, %edx // op &= 15 cmpl bits, %edx // op vs bits
jbe 0f // if (bits < op) {
movzbl (in), %eax // *in
movl bits, %ecx // cl = bits
salq %cl, %rax // *in << bits
addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits addl $8, bits // bits += 8
0: // }
movl $1, %eax // 1
movl %edx, %ecx // cl = op
sall %cl, %eax // 1 << op
decl %eax // (1 << op) - 1
andl holdd, %eax // (unsigned)hold & ((1U << op) - 1) shrq %cl, hold // hold >>= op1: // }
cmpl $14, bits // bits vs 14
jbe L99 // if (bits < 15) go to loading to hold and return to L19
L19: // }
movq dmask, %rax // dmask
andq hold, %rax // hold & dmask
movq dcode, %rdx // dcode[]
movl (%rdx,%rax,4), %eax // this = dcode[hold & dmask] .align 4,0x90
0: // op&16 == 0, test (op&64)==0 for 2nd level distance code
testb $64, %cl // op&64
jne L_invalid_distance_code // if ((op&64)==0) { /* 2nd level distance code */
movl $1, %eax // 1
sall %cl, %eax // 1 << op
decl %eax // (1 << op) - 1
andq hold, %rax // (hold & ((1U << op) - 1))
movzwl %dx, %edx // this.val
addq %rdx, %rax // this.val + (hold & ((1U << op) - 1))
movq dcode, %rcx // dcode[]
movl (%rcx,%rax,4), %eax // this = dcode[this.val + (hold & ((1U << op) - 1))] movl %eax, %edx // this
shrl $16, %edx // dist = (unsigned)(this.val) shrq %cl, hold // hold >>= op movzbl %al, %ecx // op = (unsigned)(this.op) je 0b // if (op&16) == 0, branch to check for 2nd level distance code
L_distance_base: /* distance base */
movl %ecx, %esi // op
andl $15, %esi // op&=15
cmpl bits, %esi // op vs bits
jbe 1f // if (bits < op) {
movzbl (in), %eax // *in
movl bits, %ecx // cl = bits
salq %cl, %rax // *in << bits
addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits addl $8, bits // bits += 8
cmpl bits, %esi // op vs bits
jbe 1f // if (bits < op) {
movzbl (in), %eax // *in
movl bits, %ecx // cl = bits
salq %cl, %rax // *in << bits
addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits addl $8, bits // bits += 8
1: // } }
movzwl %dx, %edx // dist
movl $1, %eax // 1
movl %esi, %ecx // cl = op
sall %cl, %eax // (1 << op)
decl %eax // (1 << op) - 1
andl holdd, %eax // (unsigned)hold & ((1U << op) - 1)
addl %edx, %eax // dist += (unsigned)hold & ((1U << op) - 1)
#ifdef INFLATE_STRICT
cmp %eax, dmax // dmax vs dist
jb L_invalid_distance_too_far_back // if (dmax < dist) break for invalid distance too far back
#endif
shrq %cl, hold // hold >>= op movl out_d, %esi // out
movl out_d, %eax // out
subl beg, %eax // op = out - beg
cmpl %eax, dist // dist vs op, /* see if copy from window */
jbe L_copy_direct_from_output // if (dist <= op) branch to copy direct from output
L_distance_back_in_window:
movl dist, %edx // dist
subl %eax, %edx // op = dist - op cmpl %edx, whave // whave vs op
jb L_invalid_distance_too_far_back // if (op > whave), break for invalid distance too far back
testl write, write // if (write!=0)
jne L_wrap_around_window // branch to wrap around window
L_very_common_case:
movl wsize, %eax // wsize
subl %edx, %eax // wsize - op
movq window, from // from = window - OFF
movl %edx, %esi // op
cmpl %edx, len // len vs op
ja L_some_from_window // if (len > op), branch for aligned code block L_some_from_window
L38:
subl $3, len // pre-decrement len by 3
jge 0f // if len >= 3, branch to the aligned code block
1: addl $3, len // post-increment len by 3
je L_do_while_loop_check // if (len==0) break to L_do_while_loop_check
movzbl (from), %eax // *from
movb %al, (out) // *out
incq out // out++
cmpl $2, len // len vs 2
jne L_do_while_loop_check // if len!=2 break to L_do_while_loop_check
movzbl 1(from), %eax // *from
movb %al, (out) // *out
incq out // out++
jmp L_do_while_loop_check // break to L_do_while_loop_check
.align 4,0x90
0: // do {
movzbl (from), %eax // *from
movb %al, (out) // *out
movzbl 1(from), %eax // *from
movb %al, 1(out) // *out
movzbl 2(from), %eax // *from
movb %al, 2(out) // *out
addq $3, out // out += 3
addq $3, from // from += 3
subl $3, len // len -= 3
jge 0b // } while (len>=0)
.align 4,0x90
L_end_of_block:
andl $32, %ecx // op & 32
jne L101 // if (op&32) branch to end-of-block break
leaq LC2(%rip), from
movq from, 48(strm) // state->mode
movl $27, (state) // state->mode = BAD
L34:
movl bits, %eax // bits
shrl $3, %eax // len = bits >> 3 subq %rdx, in // in -= len
sall $3, %eax // len << 3
movl bits, %ecx // bits
subl %eax, %ecx // bits -= len << 3
movq in, (strm) // strm->next_in = in + OFF cmpq in, last // last vs in
jbe L67 // if (last <= in) branch to L67 and return to L69
movl last, %eax // last
addl $5, %eax // last + 5
subl in_d, %eax // 5 + last - in
L69:
movl %eax, 8(strm) // update strm->avail_in
cmpq end, out // out vs end
jae L70 // if out<=end branch to L70 and return to L72
movl end, %eax // end
addl $257, %eax // 257 + end
subl %esi, %eax // 257 + end - out movl %eax, 32(strm) // update strm->avail_out
movl $1, %eax // 1
sall %cl, %eax // 1 << bits
decl %eax // (1U << bits) - 1
andq hold, %rax // hold &= (1U << bits) - 1 movl %ecx, 80(state) // state->bits = bits // clear stack memory for local variables
addq $88, %rsp
// restore registers from stack
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
// return to caller
leave
ret
.align 4,0x90
L99:
leal 8(bits), %esi // esi = bits+8
movzbl (in), %edx // 1st *in
movl bits, %ecx // cl = bits
salq %cl, %rdx // 1st *in << 8
addq %rdx, hold // 1st hold += (unsigned long)(PUP(in)) << bits movl %esi, %ecx // cl = bits + 8
salq %cl, %rax // 2nd *in << bits+8
addq %rax, hold // 2nd hold += (unsigned long)(PUP(in)) << bits addl $16, bits // bits += 16
jmp L19
L101:
movl $11, (state)
movl out_d, %esi
jmp L34
.align 4,0x90
L70:
movl end, %eax // end
subl %esi, %eax // end - out
addl $257, %eax // 257 + end - out
jmp L72
.align 4,0x90
L67:
movl last, %eax // last
subl in_d, %eax // last - in
addl $5, %eax // 5 + last - in
jmp L69
.align 4,0x90
// stuffing the following 4 bytes to align the major loop to a 16-byte boundary to give the better performance
.byte 0
.byte 0
.byte 0
.byte 0
L_copy_direct_from_output:
mov dist, %eax // dist
movq out, %rdx // out
subq %rax, %rdx // from = out - dist // do {
0: movzbl (%rdx), %eax // *from
movb %al, (out) // *out
movzbl 1(%rdx), %eax // *from
movb %al, 1(out) // *out
movzbl 2(%rdx), %eax // *from
movb %al, 2(out) // *out
addq $3, out // out+=3
addq $3, %rdx // from+=3
subl $3, len // len-=3
jge 0b // } while (len>=0) je L_do_while_loop_check // if len==0, branch to do_while_loop_check
movzbl (%rdx), %eax // *from
movb %al, (out) // *out
incq out // out++
cmpl $2, len // len == 2 ?
jne L_do_while_loop_check // if len==1, branch to do_while_loop_check
movzbl 1(%rdx), %eax // *from
movb %al, (out) // *out
incq out // out++
jmp L_do_while_loop_check // branch to do_while_loop_check
.align 4,0x90
L_some_from_window: // from : from, out, %esi/%edx = op
// do {
movzbl (from), %eax // *from
movb %al, (out) // *out
incq from // from++
incq out // out++
decl %esi // --op
jne L_some_from_window // } while (op) mov dist, %eax // dist
movq out, from // out
subq %rax, from // from = out - dist
.align 4,0x90
L_wrap_around_window:
cmpl %edx, write // write vs op
jae L_contiguous_in_window // if (write >= op) branch to contiguous in window
movl wsize_write, %eax // wsize+write
subl %edx, %eax // wsize+write-op
movq window, from // from = window - OFF
addq %rax, from // from += wsize+write-op
subl write, %edx // op -= write
cmpl %edx, len // len vs op
jbe L38 // if (len<=op) branch to copy from output
subl %edx, len // len -= op movzbl (from), %eax // *from
movb %al, (out) // *out
incq from // from++
incq out // out++
decl %edx // op--
jne 0b // } while (op)
cmpl len, write // write vs len
jae L38 // if (write >= len) branch to copy from output
movl write, %esi // op = write
subl write, len // len -= op
1: // do {
movzbl (from), %eax // *from
movb %al, (out) // *out
incq from // from++
incq out // out++
decl %esi // op--
jne 1b // } while (op) movq out, from // out
subq %rax, from // from = out - dist
.align 4,0x90
L_contiguous_in_window:
movl write, %eax // write
subl %edx, %eax // write - op
movq window, from // from = window - OFF
addq %rax, from // from += write - op
cmpl %edx, len // len vs op
jbe L38 // if (len <= op) branch to copy from output
subl %edx, len // len -= op movzbl (from), %eax // *from
movb %al, (out) // *out
incq from // from++
incq out // out++
decl %edx // op--
jne 2b // } while (op) mov dist, %eax // dist
movq out, from // out
subq %rax, from // from = out - dist
.align 4,0x90
L_invalid_distance_code:
leaq LC1(%rip), %rdx
movq %rdx, 48(strm)
movl $27, (state)
movl out_d, %esi
jmp L34
L_invalid_distance_too_far_back:
leaq LC0(%rip), %rbx
movq %rbx, 48(strm) // error message
movl $27, (state) // state->mode = BAD
jmp L34
#endif