$opf = shift;
if ($opf =~ /32\.s/) {
$BITS= 32;
$BNSZ= $BITS/8;
$ISA= "\"ppc\"";
$LD= "lwz"; $LDU= "lwzu"; $ST= "stw"; $STU= "stwu"; $UMULL= "mullw"; $UMULH= "mulhwu"; $UDIV= "divwu"; $UCMPI= "cmplwi"; $UCMP= "cmplw"; $CNTLZ= "cntlzw"; $SHL= "slw"; $SHR= "srw"; $SHRI= "srwi"; $SHLI= "slwi"; $CLRU= "clrlwi"; $INSR= "insrwi"; $ROTL= "rotlwi"; $TR= "tw"; } elsif ($opf =~ /64\.s/) {
$BITS= 64;
$BNSZ= $BITS/8;
$ISA= "\"ppc64\"";
$LD= "ld"; $LDU= "ldu"; $ST= "std"; $STU= "stdu"; $UMULL= "mulld"; $UMULH= "mulhdu"; $UDIV= "divdu"; $UCMPI= "cmpldi"; $UCMP= "cmpld"; $CNTLZ= "cntlzd"; $SHL= "sld"; $SHR= "srd"; $SHRI= "srdi"; $SHLI= "sldi"; $CLRU= "clrldi"; $INSR= "insrdi"; $ROTL= "rotldi"; $TR= "td"; } else { die "nonsense $opf"; }
( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!";
my @items = ("bn_sqr_comba4",
"bn_sqr_comba8",
"bn_mul_comba4",
"bn_mul_comba8",
"bn_sub_words",
"bn_add_words",
"bn_div_words",
"bn_sqr_words",
"bn_mul_words",
"bn_mul_add_words");
if ($opf =~ /linux/) { do_linux(); }
elsif ($opf =~ /aix/) { do_aix(); }
elsif ($opf =~ /osx/) { do_osx(); }
else { do_bsd(); }
sub do_linux {
$d=&data();
if ($BITS==64) {
foreach $t (@items) {
$d =~ s/\.$t:/\
\t.section\t".opd","aw"\
\t.align\t3\
\t.globl\t$t\
$t:\
\t.quad\t.$t,.TOC.\@tocbase,0\
\t.size\t$t,24\
\t.previous\n\
\t.type\t.$t,\@function\
\t.globl\t.$t\
.$t:/g;
}
}
else {
foreach $t (@items) {
$d=~s/\.$t/$t/g;
}
}
$d=~s/Lppcasm_/.Lppcasm_/gm;
print $d;
}
sub do_aix {
print &data();
}
sub do_osx {
$d=&data();
foreach $t (@items) {
$d=~s/\.$t/_$t/g;
}
$d=~s/\.machine.*/.text/g;
$d=~s/\ print $d;
}
sub do_bsd {
$d=&data();
foreach $t (@items) {
$d=~s/\.$t/_$t/g;
}
print $d;
}
sub data {
local($data)=<<EOF;
.set r0,0 .set SP,1 .set RTOC,2 .set r3,3 .set r4,4 .set r5,5 .set r6,6 .set r7,7
.set r8,8
.set r9,9
.set r10,10
.set r11,11
.set r12,12
.set r13,13
.set BO_IF_NOT,4
.set BO_IF,12
.set BO_dCTR_NZERO,16
.set BO_dCTR_ZERO,18
.set BO_ALWAYS,20
.set CR0_LT,0;
.set CR0_GT,1;
.set CR0_EQ,2
.set CR1_FX,4;
.set CR1_FEX,5;
.set CR1_VX,6
.set LR,8
.globl .bn_sqr_comba4
.globl .bn_sqr_comba8
.globl .bn_mul_comba4
.globl .bn_mul_comba8
.globl .bn_sub_words
.globl .bn_add_words
.globl .bn_div_words
.globl .bn_sqr_words
.globl .bn_mul_words
.globl .bn_mul_add_words
.machine $ISA
.align 4
.bn_sqr_comba4:
xor r0,r0,r0
$LD r5,`0*$BNSZ`(r4)
$UMULL r9,r5,r5
$UMULH r10,r5,r5
$ST r9,`0*$BNSZ`(r3) $LD r6,`1*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r7,r7,r7 adde r8,r8,r8
addze r9,r0
addc r10,r7,r10 addze r11,r8 addze r9,r9
$ST r10,`1*$BNSZ`(r3) $UMULL r7,r6,r6
$UMULH r8,r6,r6
addc r11,r7,r11
adde r9,r8,r9
addze r10,r0
$LD r6,`2*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r7,r7,r7
adde r8,r8,r8
addze r10,r10
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
$ST r11,`2*$BNSZ`(r3) $LD r6,`3*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r7,r7,r7
adde r8,r8,r8
addze r11,r0
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
$LD r5,`1*$BNSZ`(r4)
$LD r6,`2*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r7,r7,r7
adde r8,r8,r8
addze r11,r11
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
$ST r9,`3*$BNSZ`(r3) $UMULL r7,r6,r6
$UMULH r8,r6,r6
addc r10,r7,r10
adde r11,r8,r11
addze r9,r0
$LD r6,`3*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r7,r7,r7
adde r8,r8,r8
addze r9,r9
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
$ST r10,`4*$BNSZ`(r3) $LD r5,`2*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r7,r7,r7
adde r8,r8,r8
addze r10,r0
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
$ST r11,`5*$BNSZ`(r3) $UMULL r7,r6,r6
$UMULH r8,r6,r6
addc r9,r7,r9
adde r10,r8,r10
$ST r9,`6*$BNSZ`(r3) $ST r10,`7*$BNSZ`(r3) bclr BO_ALWAYS,CR0_LT
.long 0x00000000
.align 4
.bn_sqr_comba8:
xor r0,r0,r0
$LD r5,`0*$BNSZ`(r4)
$UMULL r9,r5,r5 $UMULH r10,r5,r5
$ST r9,`0*$BNSZ`(r3) $LD r6,`1*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r10,r7,r10 adde r11,r8,r0 addze r9,r0
addc r10,r7,r10 adde r11,r8,r11 addze r9,r9
$ST r10,`1*$BNSZ`(r3)
$UMULL r7,r6,r6
$UMULH r8,r6,r6
addc r11,r7,r11
adde r9,r8,r9
addze r10,r0
$LD r6,`2*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
$ST r11,`2*$BNSZ`(r3) $LD r6,`3*$BNSZ`(r4) $UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r9,r7,r9
adde r10,r8,r10
addze r11,r0
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
$LD r5,`1*$BNSZ`(r4)
$LD r6,`2*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
$ST r9,`3*$BNSZ`(r3) $UMULL r7,r6,r6
$UMULH r8,r6,r6
addc r10,r7,r10
adde r11,r8,r11
addze r9,r0
$LD r6,`3*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
$LD r5,`0*$BNSZ`(r4)
$LD r6,`4*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
$ST r10,`4*$BNSZ`(r3) $LD r6,`5*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r11,r7,r11
adde r9,r8,r9
addze r10,r0
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
$LD r5,`1*$BNSZ`(r4)
$LD r6,`4*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
$LD r5,`2*$BNSZ`(r4)
$LD r6,`3*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
$ST r11,`5*$BNSZ`(r3) $UMULL r7,r6,r6
$UMULH r8,r6,r6
addc r9,r7,r9
adde r10,r8,r10
addze r11,r0
$LD r6,`4*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
$LD r5,`1*$BNSZ`(r4)
$LD r6,`5*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
$LD r5,`0*$BNSZ`(r4)
$LD r6,`6*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
$ST r9,`6*$BNSZ`(r3) $LD r6,`7*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r10,r7,r10
adde r11,r8,r11
addze r9,r0
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
$LD r5,`1*$BNSZ`(r4)
$LD r6,`6*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
$LD r5,`2*$BNSZ`(r4)
$LD r6,`5*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
$LD r5,`3*$BNSZ`(r4)
$LD r6,`4*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
$ST r10,`7*$BNSZ`(r3) $UMULL r7,r6,r6
$UMULH r8,r6,r6
addc r11,r7,r11
adde r9,r8,r9
addze r10,r0
$LD r6,`5*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
$LD r5,`2*$BNSZ`(r4)
$LD r6,`6*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
$LD r5,`1*$BNSZ`(r4)
$LD r6,`7*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
$ST r11,`8*$BNSZ`(r3) $LD r5,`2*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r9,r7,r9
adde r10,r8,r10
addze r11,r0
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
$LD r5,`3*$BNSZ`(r4)
$LD r6,`6*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
$LD r5,`4*$BNSZ`(r4)
$LD r6,`5*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
$ST r9,`9*$BNSZ`(r3) $UMULL r7,r6,r6
$UMULH r8,r6,r6
addc r10,r7,r10
adde r11,r8,r11
addze r9,r0
$LD r6,`6*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
$LD r5,`3*$BNSZ`(r4)
$LD r6,`7*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
$ST r10,`10*$BNSZ`(r3) $LD r5,`4*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r11,r7,r11
adde r9,r8,r9
addze r10,r0
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
$LD r5,`5*$BNSZ`(r4)
$LD r6,`6*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
$ST r11,`11*$BNSZ`(r3) $UMULL r7,r6,r6
$UMULH r8,r6,r6
addc r9,r7,r9
adde r10,r8,r10
addze r11,r0
$LD r6,`7*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
$ST r9,`12*$BNSZ`(r3)
$LD r5,`6*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r10,r7,r10
adde r11,r8,r11
addze r9,r0
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
$ST r10,`13*$BNSZ`(r3) $UMULL r7,r6,r6
$UMULH r8,r6,r6
addc r11,r7,r11
adde r9,r8,r9
$ST r11,`14*$BNSZ`(r3) $ST r9, `15*$BNSZ`(r3)
bclr BO_ALWAYS,CR0_LT
.long 0x00000000
.align 4
.bn_mul_comba4:
xor r0,r0,r0 $LD r6,`0*$BNSZ`(r4)
$LD r7,`0*$BNSZ`(r5)
$UMULL r10,r6,r7
$UMULH r11,r6,r7
$ST r10,`0*$BNSZ`(r3) $LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r8,r11
adde r12,r9,r0
addze r10,r0
$LD r6, `1*$BNSZ`(r4)
$LD r7, `0*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r8,r11
adde r12,r9,r12
addze r10,r10
$ST r11,`1*$BNSZ`(r3) $LD r6,`2*$BNSZ`(r4)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r8,r12
adde r10,r9,r10
addze r11,r0
$LD r6,`1*$BNSZ`(r4)
$LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r8,r12
adde r10,r9,r10
addze r11,r11
$LD r6,`0*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r8,r12
adde r10,r9,r10
addze r11,r11
$ST r12,`2*$BNSZ`(r3) $LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r8,r10
adde r11,r9,r11
addze r12,r0
$LD r6,`1*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r8,r10
adde r11,r9,r11
addze r12,r12
$LD r6,`2*$BNSZ`(r4)
$LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r8,r10
adde r11,r9,r11
addze r12,r12
$LD r6,`3*$BNSZ`(r4)
$LD r7,`0*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r8,r10
adde r11,r9,r11
addze r12,r12
$ST r10,`3*$BNSZ`(r3) $LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r8,r11
adde r12,r9,r12
addze r10,r0
$LD r6,`2*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r8,r11
adde r12,r9,r12
addze r10,r10
$LD r6,`1*$BNSZ`(r4)
$LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r8,r11
adde r12,r9,r12
addze r10,r10
$ST r11,`4*$BNSZ`(r3) $LD r6,`2*$BNSZ`(r4)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r8,r12
adde r10,r9,r10
addze r11,r0
$LD r6,`3*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r4)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r8,r12
adde r10,r9,r10
addze r11,r11
$ST r12,`5*$BNSZ`(r3) $LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r8,r10
adde r11,r9,r11
$ST r10,`6*$BNSZ`(r3) $ST r11,`7*$BNSZ`(r3) bclr BO_ALWAYS,CR0_LT
.long 0x00000000
.align 4
.bn_mul_comba8:
xor r0,r0,r0
$LD r6,`0*$BNSZ`(r4) $LD r7,`0*$BNSZ`(r5) $UMULL r10,r6,r7
$UMULH r11,r6,r7
$ST r10,`0*$BNSZ`(r3) $LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
addze r12,r9 addze r10,r0
$LD r6,`1*$BNSZ`(r4)
$LD r7,`0*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$ST r11,`1*$BNSZ`(r3) $LD r6,`2*$BNSZ`(r4)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r0
$LD r6,`1*$BNSZ`(r4)
$LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$LD r6,`0*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$ST r12,`2*$BNSZ`(r3) $LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r0
$LD r6,`1*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$LD r6,`2*$BNSZ`(r4)
$LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$LD r6,`3*$BNSZ`(r4)
$LD r7,`0*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$ST r10,`3*$BNSZ`(r3) $LD r6,`4*$BNSZ`(r4)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r0
$LD r6,`3*$BNSZ`(r4)
$LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$LD r6,`2*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$LD r6,`1*$BNSZ`(r4)
$LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$LD r6,`0*$BNSZ`(r4)
$LD r7,`4*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$ST r11,`4*$BNSZ`(r3) $LD r7,`5*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r0
$LD r6,`1*$BNSZ`(r4)
$LD r7,`4*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$LD r6,`2*$BNSZ`(r4)
$LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$LD r6,`3*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$LD r6,`4*$BNSZ`(r4)
$LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$LD r6,`5*$BNSZ`(r4)
$LD r7,`0*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$ST r12,`5*$BNSZ`(r3) $LD r6,`6*$BNSZ`(r4)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r0
$LD r6,`5*$BNSZ`(r4)
$LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$LD r6,`4*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$LD r6,`3*$BNSZ`(r4)
$LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$LD r6,`2*$BNSZ`(r4)
$LD r7,`4*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$LD r6,`1*$BNSZ`(r4)
$LD r7,`5*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$LD r6,`0*$BNSZ`(r4)
$LD r7,`6*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$ST r10,`6*$BNSZ`(r3) $LD r7,`7*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r0
$LD r6,`1*$BNSZ`(r4)
$LD r7,`6*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$LD r6,`2*$BNSZ`(r4)
$LD r7,`5*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$LD r6,`3*$BNSZ`(r4)
$LD r7,`4*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$LD r6,`4*$BNSZ`(r4)
$LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$LD r6,`5*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$LD r6,`6*$BNSZ`(r4)
$LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$LD r6,`7*$BNSZ`(r4)
$LD r7,`0*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$ST r11,`7*$BNSZ`(r3) $LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r0
$LD r6,`6*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$LD r6,`5*$BNSZ`(r4)
$LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$LD r6,`4*$BNSZ`(r4)
$LD r7,`4*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$LD r6,`3*$BNSZ`(r4)
$LD r7,`5*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$LD r6,`2*$BNSZ`(r4)
$LD r7,`6*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$LD r6,`1*$BNSZ`(r4)
$LD r7,`7*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$ST r12,`8*$BNSZ`(r3) $LD r6,`2*$BNSZ`(r4)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r0
$LD r6,`3*$BNSZ`(r4)
$LD r7,`6*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$LD r6,`4*$BNSZ`(r4)
$LD r7,`5*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$LD r6,`5*$BNSZ`(r4)
$LD r7,`4*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$LD r6,`6*$BNSZ`(r4)
$LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$LD r6,`7*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$ST r10,`9*$BNSZ`(r3) $LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r0
$LD r6,`6*$BNSZ`(r4)
$LD r7,`4*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$LD r6,`5*$BNSZ`(r4)
$LD r7,`5*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$LD r6,`4*$BNSZ`(r4)
$LD r7,`6*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$LD r6,`3*$BNSZ`(r4)
$LD r7,`7*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$ST r11,`10*$BNSZ`(r3) $LD r6,`4*$BNSZ`(r4)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r0
$LD r6,`5*$BNSZ`(r4)
$LD r7,`6*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$LD r6,`6*$BNSZ`(r4)
$LD r7,`5*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$LD r6,`7*$BNSZ`(r4)
$LD r7,`4*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
addze r11,r11
$ST r12,`11*$BNSZ`(r3) $LD r7,`5*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r0
$LD r6,`6*$BNSZ`(r4)
$LD r7,`6*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$LD r6,`5*$BNSZ`(r4)
$LD r7,`7*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
$ST r10,`12*$BNSZ`(r3) $LD r6,`6*$BNSZ`(r4)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r0
$LD r6,`7*$BNSZ`(r4)
$LD r7,`6*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
adde r12,r12,r9
addze r10,r10
$ST r11,`13*$BNSZ`(r3) $LD r7,`7*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r12,r8
adde r10,r10,r9
$ST r12,`14*$BNSZ`(r3) $ST r10,`15*$BNSZ`(r3) bclr BO_ALWAYS,CR0_LT
.long 0x00000000
.align 4
.bn_sub_words:
xor r0,r0,r0 subfc. r7,r0,r6 bc BO_IF,CR0_EQ,Lppcasm_sub_adios
addi r4,r4,-$BNSZ
addi r3,r3,-$BNSZ
addi r5,r5,-$BNSZ
mtctr r6
Lppcasm_sub_mainloop:
$LDU r7,$BNSZ(r4)
$LDU r8,$BNSZ(r5)
subfe r6,r8,r7 $STU r6,$BNSZ(r3)
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop
Lppcasm_sub_adios:
subfze r3,r0 andi. r3,r3,1 bclr BO_ALWAYS,CR0_LT
.long 0x00000000
.align 4
.bn_add_words:
xor r0,r0,r0
addic. r6,r6,0 bc BO_IF,CR0_EQ,Lppcasm_add_adios
addi r4,r4,-$BNSZ
addi r3,r3,-$BNSZ
addi r5,r5,-$BNSZ
mtctr r6
Lppcasm_add_mainloop:
$LDU r7,$BNSZ(r4)
$LDU r8,$BNSZ(r5)
adde r8,r7,r8
$STU r8,$BNSZ(r3)
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop
Lppcasm_add_adios:
addze r3,r0 bclr BO_ALWAYS,CR0_LT
.long 0x00000000
.align 4
.bn_div_words:
$UCMPI 0,r5,0 bc BO_IF_NOT,CR0_EQ,Lppcasm_div1 li r3,-1 bclr BO_ALWAYS,CR0_LT
Lppcasm_div1:
xor r0,r0,r0 li r8,$BITS
$CNTLZ. r7,r5 bc BO_IF,CR0_EQ,Lppcasm_div2 subf r8,r7,r8 $SHR. r9,r3,r8 $TR 16,r9,r0 Lppcasm_div2:
$UCMP 0,r3,r5 bc BO_IF,CR0_LT,Lppcasm_div3 subf r3,r5,r3 Lppcasm_div3: cmpi 0,0,r7,0 bc BO_IF,CR0_EQ,Lppcasm_div4
$SHL r3,r3,r7 $SHR r8,r4,r8 $SHL r5,r5,r7 or r3,r3,r8 $SHL r4,r4,r7 Lppcasm_div4:
$SHRI r9,r5,`$BITS/2` li r6,2 mtctr r6 Lppcasm_divouterloop:
$SHRI r8,r3,`$BITS/2` $SHRI r11,r4,`$BITS/2` $UCMP 0,r8,r9 bc BO_IF_NOT,CR0_EQ,Lppcasm_div5
li r8,-1
$CLRU r8,r8,`$BITS/2` b Lppcasm_div6
Lppcasm_div5:
$UDIV r8,r3,r9 Lppcasm_div6:
$UMULL r12,r9,r8 $CLRU r10,r5,`$BITS/2` $UMULL r6,r8,r10
Lppcasm_divinnerloop:
subf r10,r12,r3 $SHRI r7,r10,`$BITS/2` addic. r7,r7,0 $SHLI r7,r10,`$BITS/2` or r7,r7,r11 $UCMP 1,r6,r7 bc BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit
bc BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit
addi r8,r8,-1 subf r12,r9,r12 $CLRU r10,r5,`$BITS/2` subf r6,r10,r6 b Lppcasm_divinnerloop
Lppcasm_divinnerexit:
$SHRI r10,r6,`$BITS/2` $SHLI r11,r6,`$BITS/2` $UCMP 1,r4,r11 add r12,r12,r10 bc BO_IF_NOT,CR1_FX,Lppcasm_div7 addi r12,r12,1 Lppcasm_div7:
subf r11,r11,r4 $UCMP 1,r3,r12 bc BO_IF_NOT,CR1_FX,Lppcasm_div8 addi r8,r8,-1 add r3,r5,r3 Lppcasm_div8:
subf r12,r12,r3 $SHLI r4,r11,`$BITS/2` $INSR r11,r12,`$BITS/2`,`$BITS/2` $ROTL r3,r11,`$BITS/2` bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9 $SHLI r0,r8,`$BITS/2` b Lppcasm_divouterloop
Lppcasm_div9:
or r3,r8,r0
bclr BO_ALWAYS,CR0_LT
.long 0x00000000
.align 4
.bn_sqr_words:
addic. r5,r5,0 bc BO_IF,CR0_EQ,Lppcasm_sqr_adios
addi r4,r4,-$BNSZ
addi r3,r3,-$BNSZ
mtctr r5
Lppcasm_sqr_mainloop:
$LDU r6,$BNSZ(r4)
$UMULL r7,r6,r6
$UMULH r8,r6,r6
$STU r7,$BNSZ(r3)
$STU r8,$BNSZ(r3)
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop
Lppcasm_sqr_adios:
bclr BO_ALWAYS,CR0_LT
.long 0x00000000
.align 4
.bn_mul_words:
xor r0,r0,r0
xor r12,r12,r12 rlwinm. r7,r5,30,2,31 bc BO_IF,CR0_EQ,Lppcasm_mw_REM
mtctr r7
Lppcasm_mw_LOOP:
$LD r8,`0*$BNSZ`(r4)
$UMULL r9,r6,r8
$UMULH r10,r6,r8
addc r9,r9,r12
$ST r9,`0*$BNSZ`(r3)
$LD r8,`1*$BNSZ`(r4)
$UMULL r11,r6,r8
$UMULH r12,r6,r8
adde r11,r11,r10
$ST r11,`1*$BNSZ`(r3)
$LD r8,`2*$BNSZ`(r4)
$UMULL r9,r6,r8
$UMULH r10,r6,r8
adde r9,r9,r12
$ST r9,`2*$BNSZ`(r3)
$LD r8,`3*$BNSZ`(r4)
$UMULL r11,r6,r8
$UMULH r12,r6,r8
adde r11,r11,r10
addze r12,r12 $ST r11,`3*$BNSZ`(r3)
addi r3,r3,`4*$BNSZ`
addi r4,r4,`4*$BNSZ`
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP
Lppcasm_mw_REM:
andi. r5,r5,0x3
bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
$LD r8,`0*$BNSZ`(r4)
$UMULL r9,r6,r8
$UMULH r10,r6,r8
addc r9,r9,r12
addze r10,r10
$ST r9,`0*$BNSZ`(r3)
addi r12,r10,0
addi r5,r5,-1
cmpli 0,0,r5,0
bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
$LD r8,`1*$BNSZ`(r4)
$UMULL r9,r6,r8
$UMULH r10,r6,r8
addc r9,r9,r12
addze r10,r10
$ST r9,`1*$BNSZ`(r3)
addi r12,r10,0
addi r5,r5,-1
cmpli 0,0,r5,0
bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
$LD r8,`2*$BNSZ`(r4)
$UMULL r9,r6,r8
$UMULH r10,r6,r8
addc r9,r9,r12
addze r10,r10
$ST r9,`2*$BNSZ`(r3)
addi r12,r10,0
Lppcasm_mw_OVER:
addi r3,r12,0
bclr BO_ALWAYS,CR0_LT
.long 0x00000000
.align 4
.bn_mul_add_words:
xor r0,r0,r0 xor r12,r12,r12 rlwinm. r7,r5,30,2,31 bc BO_IF,CR0_EQ,Lppcasm_maw_leftover mtctr r7
Lppcasm_maw_mainloop:
$LD r8,`0*$BNSZ`(r4)
$LD r11,`0*$BNSZ`(r3)
$UMULL r9,r6,r8
$UMULH r10,r6,r8
addc r9,r9,r12 addze r10,r10
addc r9,r9,r11
$ST r9,`0*$BNSZ`(r3)
$LD r8,`1*$BNSZ`(r4)
$LD r9,`1*$BNSZ`(r3)
$UMULL r11,r6,r8
$UMULH r12,r6,r8
adde r11,r11,r10 addze r12,r12
addc r11,r11,r9
$ST r11,`1*$BNSZ`(r3)
$LD r8,`2*$BNSZ`(r4)
$UMULL r9,r6,r8
$LD r11,`2*$BNSZ`(r3)
$UMULH r10,r6,r8
adde r9,r9,r12
addze r10,r10
addc r9,r9,r11
$ST r9,`2*$BNSZ`(r3)
$LD r8,`3*$BNSZ`(r4)
$UMULL r11,r6,r8
$LD r9,`3*$BNSZ`(r3)
$UMULH r12,r6,r8
adde r11,r11,r10
addze r12,r12
addc r11,r11,r9
addze r12,r12
$ST r11,`3*$BNSZ`(r3)
addi r3,r3,`4*$BNSZ`
addi r4,r4,`4*$BNSZ`
bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop
Lppcasm_maw_leftover:
andi. r5,r5,0x3
bc BO_IF,CR0_EQ,Lppcasm_maw_adios
addi r3,r3,-$BNSZ
addi r4,r4,-$BNSZ
mtctr r5
$LDU r8,$BNSZ(r4)
$UMULL r9,r6,r8
$UMULH r10,r6,r8
$LDU r11,$BNSZ(r3)
addc r9,r9,r11
addze r10,r10
addc r9,r9,r12
addze r12,r10
$ST r9,0(r3)
bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
$LDU r8,$BNSZ(r4)
$UMULL r9,r6,r8
$UMULH r10,r6,r8
$LDU r11,$BNSZ(r3)
addc r9,r9,r11
addze r10,r10
addc r9,r9,r12
addze r12,r10
$ST r9,0(r3)
bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
$LDU r8,$BNSZ(r4)
$UMULL r9,r6,r8
$UMULH r10,r6,r8
$LDU r11,$BNSZ(r3)
addc r9,r9,r11
addze r10,r10
addc r9,r9,r12
addze r12,r10
$ST r9,0(r3)
Lppcasm_maw_adios:
addi r3,r12,0
bclr BO_ALWAYS,CR0_LT
.long 0x00000000
.align 4
EOF
$data =~ s/\`([^\`]*)\`/eval $1/gem;
$data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm;
return($data);
}