$output=shift;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
open STDOUT,"| $^X $xlate $output";
$dat="%rdi"; $len="%rsi"; $inp="%rdx"; $out="%rcx";
@XX=("%r8","%r10");
@TX=("%r9","%r11");
$YY="%r12";
$TY="%r13";
$code=<<___;
.text
.globl RC4
.type RC4,\@function,4
.align 16
RC4: or $len,$len
jne .Lentry
ret
.Lentry:
push %r12
push %r13
add \$8,$dat
movl -8($dat),$XX[0] movl -4($dat),$YY cmpl \$-1,256($dat)
je .LRC4_CHAR
inc $XX[0] movl ($dat,$XX[0],4),$TX[0] test \$-8,$len
jz .Lloop1
jmp .Lloop8
.align 16
.Lloop8:
___
for ($i=0;$i<8;$i++) {
$code.=<<___;
add $TX[0] mov $XX[0],$XX[1]
movl ($dat,$YY,4),$TY ror \$8,%rax inc $XX[1] movl ($dat,$XX[1],4),$TX[1] cmp $XX[1],$YY
movl $TX[0] cmove $TX[0],$TX[1]
movl $TY add $TX[0] movb ($dat,$TY,4),%al
___
push(@TX,shift(@TX)); push(@XX,shift(@XX)); }
$code.=<<___;
ror \$8,%rax
sub \$8,$len
xor ($inp),%rax
add \$8,$inp
mov %rax,($out)
add \$8,$out
test \$-8,$len
jnz .Lloop8
cmp \$0,$len
jne .Lloop1
___
$code.=<<___;
.Lexit:
sub \$1,$XX[0] movl $XX[0] movl $YY
pop %r13
pop %r12
ret
.align 16
.Lloop1:
add $TX[0] movl ($dat,$YY,4),$TY movl $TX[0] movl $TY add $TY inc $XX[0] movl ($dat,$TX[0],4),$TY movl ($dat,$XX[0],4),$TX[0] xorb ($inp),$TY inc $inp
movb $TY inc $out
dec $len
jnz .Lloop1
jmp .Lexit
.align 16
.LRC4_CHAR:
add \$1,$XX[0] movzb ($dat,$XX[0]),$TX[0] test \$-8,$len
jz .Lcloop1
cmpl \$0,260($dat)
jnz .Lcloop1
push %rbx
jmp .Lcloop8
.align 16
.Lcloop8:
mov ($inp),%eax
mov 4($inp),%ebx
___
for ($i=0;$i<4;$i++) {
$code.=<<___;
add $TX[0] lea 1($XX[0]),$XX[1]
movzb ($dat,$YY),$TY movzb $XX[1] movzb ($dat,$XX[1]),$TX[1] movb $TX[0] cmp $XX[1],$YY
movb $TY jne .Lcmov$i mov $TX[0],$TX[1]
.Lcmov$i:
add $TX[0] xor ($dat,$TY),%al
ror \$8,%eax
___
push(@TX,shift(@TX)); push(@XX,shift(@XX)); }
for ($i=4;$i<8;$i++) {
$code.=<<___;
add $TX[0] lea 1($XX[0]),$XX[1]
movzb ($dat,$YY),$TY movzb $XX[1] movzb ($dat,$XX[1]),$TX[1] movb $TX[0] cmp $XX[1],$YY
movb $TY jne .Lcmov$i mov $TX[0],$TX[1]
.Lcmov$i:
add $TX[0] xor ($dat,$TY),%bl
ror \$8,%ebx
___
push(@TX,shift(@TX)); push(@XX,shift(@XX)); }
$code.=<<___;
lea -8($len),$len
mov %eax,($out)
lea 8($inp),$inp
mov %ebx,4($out)
lea 8($out),$out
test \$-8,$len
jnz .Lcloop8
pop %rbx
cmp \$0,$len
jne .Lcloop1
jmp .Lexit
___
$code.=<<___;
.align 16
.Lcloop1:
add $TX[0] movzb ($dat,$YY),$TY movb $TX[0] movb $TY add $TX[0] add \$1,$XX[0] movzb $TY movzb $XX[0] movzb ($dat,$TY),$TY movzb ($dat,$XX[0]),$TX[0] xorb ($inp),$TY lea 1($inp),$inp
movb $TY lea 1($out),$out
sub \$1,$len
jnz .Lcloop1
jmp .Lexit
.size RC4,.-RC4
___
$idx="%r8";
$ido="%r9";
$code.=<<___;
.extern OPENSSL_ia32cap_P
.globl RC4_set_key
.type RC4_set_key,\@function,3
.align 16
RC4_set_key:
lea 8($dat),$dat
lea ($inp,$len),$inp
neg $len
mov $len,%rcx
xor %eax,%eax
xor $ido,$ido
xor %r10,%r10
xor %r11,%r11
mov OPENSSL_ia32cap_P(%rip),$idx bt \$20,$idx jnc .Lw1stloop
bt \$30,$idx setc $ido mov $ido jmp .Lc1stloop
.align 16
.Lw1stloop:
mov %eax,($dat,%rax,4)
add \$1,%al
jnc .Lw1stloop
xor $ido,$ido
xor $idx,$idx
.align 16
.Lw2ndloop:
mov ($dat,$ido,4),%r10d
add ($inp,$len,1),$idx add %r10b,$idx add \$1,$len
mov ($dat,$idx,4),%r11d
cmovz %rcx,$len
mov %r10d,($dat,$idx,4)
mov %r11d,($dat,$ido,4)
add \$1,$ido jnc .Lw2ndloop
jmp .Lexit_key
.align 16
.Lc1stloop:
mov %al,($dat,%rax)
add \$1,%al
jnc .Lc1stloop
xor $ido,$ido
xor $idx,$idx
.align 16
.Lc2ndloop:
mov ($dat,$ido),%r10b
add ($inp,$len),$idx add %r10b,$idx add \$1,$len
mov ($dat,$idx),%r11b
jnz .Lcnowrap
mov %rcx,$len
.Lcnowrap:
mov %r10b,($dat,$idx)
mov %r11b,($dat,$ido)
add \$1,$ido jnc .Lc2ndloop
movl \$-1,256($dat)
.align 16
.Lexit_key:
xor %eax,%eax
mov %eax,-8($dat)
mov %eax,-4($dat)
ret
.size RC4_set_key,.-RC4_set_key
.globl RC4_options
.type RC4_options,\@function,0
.align 16
RC4_options:
.picmeup %rax
lea .Lopts-.(%rax),%rax
mov OPENSSL_ia32cap_P(%rip),%edx
bt \$20,%edx
jnc .Ldone
add \$12,%rax
bt \$30,%edx
jnc .Ldone
add \$13,%rax
.Ldone:
ret
.align 64
.Lopts:
.asciz "rc4(8x,int)"
.asciz "rc4(8x,char)"
.asciz "rc4(1x,char)"
.asciz "RC4 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.align 64
.size RC4_options,.-RC4_options
___
$code =~ s/
$code =~ s/RC4_set_key/private_RC4_set_key/g if ($ENV{FIPSCANLIB} ne "");
print $code;
close STDOUT;