bn-alpha.pl   [plain text]


#!/usr/local/bin/perl
# I have this in perl so I can use more usefull register names and then convert
# them into alpha registers.
#

$d=&data();
$d =~ s/CC/0/g;
$d =~ s/R1/1/g;
$d =~ s/R2/2/g;
$d =~ s/R3/3/g;
$d =~ s/R4/4/g;
$d =~ s/L1/5/g;
$d =~ s/L2/6/g;
$d =~ s/L3/7/g;
$d =~ s/L4/8/g;
$d =~ s/O1/22/g;
$d =~ s/O2/23/g;
$d =~ s/O3/24/g;
$d =~ s/O4/25/g;
$d =~ s/A1/20/g;
$d =~ s/A2/21/g;
$d =~ s/A3/27/g;
$d =~ s/A4/28/g;
if (0){
}

print $d;

sub data
	{
	local($data)=<<'EOF';

 # DEC Alpha assember
 # The bn_div_words is actually gcc output but the other parts are hand done.
 # Thanks to tzeruch@ceddec.com for sending me the gcc output for
 # bn_div_words.
 # I've gone back and re-done most of routines.
 # The key thing to remeber for the 164 CPU is that while a
 # multiply operation takes 8 cycles, another one can only be issued
 # after 4 cycles have elapsed.  I've done modification to help
 # improve this.  Also, normally, a ld instruction will not be available
 # for about 3 cycles.
	.file	1 "bn_asm.c"
	.set noat
gcc2_compiled.:
__gnu_compiled_c:
	.text
	.align 3
	.globl bn_mul_add_words
	.ent bn_mul_add_words
bn_mul_add_words:
bn_mul_add_words..ng:
	.frame $30,0,$26,0
	.prologue 0
	.align 5
	subq	$18,4,$18
	bis	$31,$31,$CC
	blt	$18,$43		# if we are -1, -2, -3 or -4 goto tail code
	ldq	$A1,0($17)	# 1 1
	ldq	$R1,0($16)	# 1 1
	.align 3
$42:
	mulq	$A1,$19,$L1	# 1 2 1	######
	ldq	$A2,8($17)	# 2 1
	ldq	$R2,8($16)	# 2 1
	umulh	$A1,$19,$A1	# 1 2	######
	ldq	$A3,16($17)	# 3 1
	ldq	$R3,16($16)	# 3 1
	mulq	$A2,$19,$L2	# 2 2 1	######
	 ldq	$A4,24($17)	# 4 1
	addq	$R1,$L1,$R1	# 1 2 2
	 ldq	$R4,24($16)	# 4 1
	umulh	$A2,$19,$A2	# 2 2	######
	 cmpult	$R1,$L1,$O1	# 1 2 3 1
	addq	$A1,$O1,$A1	# 1 3 1
	 addq	$R1,$CC,$R1	# 1 2 3 1
	mulq	$A3,$19,$L3	# 3 2 1	######
	 cmpult	$R1,$CC,$CC	# 1 2 3 2
	addq	$R2,$L2,$R2	# 2 2 2
	 addq	$A1,$CC,$CC	# 1 3 2 
	cmpult	$R2,$L2,$O2	# 2 2 3 1
	 addq	$A2,$O2,$A2	# 2 3 1
	umulh	$A3,$19,$A3	# 3 2	######
	 addq	$R2,$CC,$R2	# 2 2 3 1
	cmpult	$R2,$CC,$CC	# 2 2 3 2
	 subq	$18,4,$18
	mulq	$A4,$19,$L4	# 4 2 1	######
	 addq	$A2,$CC,$CC	# 2 3 2 
	addq	$R3,$L3,$R3	# 3 2 2
	 addq	$16,32,$16
	cmpult	$R3,$L3,$O3	# 3 2 3 1
	 stq	$R1,-32($16)	# 1 2 4
	umulh	$A4,$19,$A4	# 4 2	######
	 addq	$A3,$O3,$A3	# 3 3 1
	addq	$R3,$CC,$R3	# 3 2 3 1
	 stq	$R2,-24($16)	# 2 2 4
	cmpult	$R3,$CC,$CC	# 3 2 3 2
	 stq	$R3,-16($16)	# 3 2 4
	addq	$R4,$L4,$R4	# 4 2 2
	 addq	$A3,$CC,$CC	# 3 3 2 
	cmpult	$R4,$L4,$O4	# 4 2 3 1
	 addq	$17,32,$17
	addq	$A4,$O4,$A4	# 4 3 1
	 addq	$R4,$CC,$R4	# 4 2 3 1
	cmpult	$R4,$CC,$CC	# 4 2 3 2
	 stq	$R4,-8($16)	# 4 2 4
	addq	$A4,$CC,$CC	# 4 3 2 
	 blt	$18,$43

	ldq	$A1,0($17)	# 1 1
	ldq	$R1,0($16)	# 1 1

	br	$42

	.align 4
$45:
	ldq	$A1,0($17)	# 4 1
	ldq	$R1,0($16)	# 4 1
	mulq	$A1,$19,$L1	# 4 2 1
	subq	$18,1,$18
	addq	$16,8,$16
	addq	$17,8,$17
	umulh	$A1,$19,$A1	# 4 2
	addq	$R1,$L1,$R1	# 4 2 2
	cmpult	$R1,$L1,$O1	# 4 2 3 1
	addq	$A1,$O1,$A1	# 4 3 1
	addq	$R1,$CC,$R1	# 4 2 3 1
	cmpult	$R1,$CC,$CC	# 4 2 3 2
	addq	$A1,$CC,$CC	# 4 3 2 
	stq	$R1,-8($16)	# 4 2 4
	bgt	$18,$45
	ret	$31,($26),1	# else exit

	.align 4
$43:
	addq	$18,4,$18
	bgt	$18,$45		# goto tail code
	ret	$31,($26),1	# else exit

	.end bn_mul_add_words
	.align 3
	.globl bn_mul_words
	.ent bn_mul_words
bn_mul_words:
bn_mul_words..ng:
	.frame $30,0,$26,0
	.prologue 0
	.align 5
	subq	$18,4,$18
	bis	$31,$31,$CC
	blt	$18,$143	# if we are -1, -2, -3 or -4 goto tail code
	ldq	$A1,0($17)	# 1 1
	.align 3
$142:

	mulq	$A1,$19,$L1	# 1 2 1	#####
	 ldq	$A2,8($17)	# 2 1
	 ldq	$A3,16($17)	# 3 1
	umulh	$A1,$19,$A1	# 1 2	#####
	 ldq	$A4,24($17)	# 4 1
	mulq	$A2,$19,$L2	# 2 2 1	#####
	 addq	$L1,$CC,$L1	# 1 2 3 1
	subq	$18,4,$18
	 cmpult	$L1,$CC,$CC	# 1 2 3 2
	umulh	$A2,$19,$A2	# 2 2	#####
	 addq	$A1,$CC,$CC	# 1 3 2 
	addq	$17,32,$17
	 addq	$L2,$CC,$L2	# 2 2 3 1
	mulq	$A3,$19,$L3	# 3 2 1	#####
	 cmpult	$L2,$CC,$CC	# 2 2 3 2
	addq	$A2,$CC,$CC	# 2 3 2 
	 addq	$16,32,$16
	umulh	$A3,$19,$A3	# 3 2	#####
	 stq	$L1,-32($16)	# 1 2 4
	mulq	$A4,$19,$L4	# 4 2 1	#####
	 addq	$L3,$CC,$L3	# 3 2 3 1
	stq	$L2,-24($16)	# 2 2 4
	 cmpult	$L3,$CC,$CC	# 3 2 3 2
	umulh	$A4,$19,$A4	# 4 2	#####
	 addq	$A3,$CC,$CC	# 3 3 2 
	stq	$L3,-16($16)	# 3 2 4
	 addq	$L4,$CC,$L4	# 4 2 3 1
	cmpult	$L4,$CC,$CC	# 4 2 3 2

	addq	$A4,$CC,$CC	# 4 3 2 

	stq	$L4,-8($16)	# 4 2 4

	blt	$18,$143

	ldq	$A1,0($17)	# 1 1

	br	$142

	.align 4
$145:
	ldq	$A1,0($17)	# 4 1
	mulq	$A1,$19,$L1	# 4 2 1
	subq	$18,1,$18
	umulh	$A1,$19,$A1	# 4 2
	addq	$L1,$CC,$L1	# 4 2 3 1
	 addq	$16,8,$16
	cmpult	$L1,$CC,$CC	# 4 2 3 2
	 addq	$17,8,$17
	addq	$A1,$CC,$CC	# 4 3 2 
	stq	$L1,-8($16)	# 4 2 4

	bgt	$18,$145
	ret	$31,($26),1	# else exit

	.align 4
$143:
	addq	$18,4,$18
	bgt	$18,$145	# goto tail code
	ret	$31,($26),1	# else exit

	.end bn_mul_words
	.align 3
	.globl bn_sqr_words
	.ent bn_sqr_words
bn_sqr_words:
bn_sqr_words..ng:
	.frame $30,0,$26,0
	.prologue 0

	subq	$18,4,$18
	blt	$18,$543	# if we are -1, -2, -3 or -4 goto tail code
	ldq	$A1,0($17)	# 1 1
	.align 3
$542:
	mulq	$A1,$A1,$L1		######
	 ldq	$A2,8($17)	# 1 1
	subq	$18,4
 	umulh	$A1,$A1,$R1		######
	ldq	$A3,16($17)	# 1 1
	mulq	$A2,$A2,$L2		######
	ldq	$A4,24($17)	# 1 1
	stq	$L1,0($16)	# r[0]
 	umulh	$A2,$A2,$R2		######
	stq	$R1,8($16)	# r[1]
	mulq	$A3,$A3,$L3		######
	stq	$L2,16($16)	# r[0]
 	umulh	$A3,$A3,$R3		######
	stq	$R2,24($16)	# r[1]
	mulq	$A4,$A4,$L4		######
	stq	$L3,32($16)	# r[0]
 	umulh	$A4,$A4,$R4		######
	stq	$R3,40($16)	# r[1]

 	addq	$16,64,$16
 	addq	$17,32,$17
	stq	$L4,-16($16)	# r[0]
	stq	$R4,-8($16)	# r[1]

	blt	$18,$543
	ldq	$A1,0($17)	# 1 1
 	br 	$542

$442:
	ldq	$A1,0($17)   # a[0]
	mulq	$A1,$A1,$L1  # a[0]*w low part       r2
	addq	$16,16,$16
	addq	$17,8,$17
	subq	$18,1,$18
        umulh	$A1,$A1,$R1  # a[0]*w high part       r3
	stq	$L1,-16($16)   # r[0]
        stq	$R1,-8($16)   # r[1]

	bgt	$18,$442
	ret	$31,($26),1	# else exit

	.align 4
$543:
	addq	$18,4,$18
	bgt	$18,$442	# goto tail code
	ret	$31,($26),1	# else exit
	.end bn_sqr_words

	.align 3
	.globl bn_add_words
	.ent bn_add_words
bn_add_words:
bn_add_words..ng:
	.frame $30,0,$26,0
	.prologue 0

	subq	$19,4,$19
	bis	$31,$31,$CC	# carry = 0
	blt	$19,$900
	ldq	$L1,0($17)	# a[0]
	ldq	$R1,0($18)	# b[1]
	.align 3
$901:
	addq	$R1,$L1,$R1	# r=a+b;
	 ldq	$L2,8($17)	# a[1]
	cmpult	$R1,$L1,$O1	# did we overflow?
	 ldq	$R2,8($18)	# b[1]
	addq	$R1,$CC,$R1	# c+= overflow
	 ldq	$L3,16($17)	# a[2]
	cmpult	$R1,$CC,$CC	# overflow?
	 ldq	$R3,16($18)	# b[2]
	addq	$CC,$O1,$CC
	 ldq	$L4,24($17)	# a[3]
	addq	$R2,$L2,$R2	# r=a+b;
	 ldq	$R4,24($18)	# b[3]
	cmpult	$R2,$L2,$O2	# did we overflow?
	 addq	$R3,$L3,$R3	# r=a+b;
	addq	$R2,$CC,$R2	# c+= overflow
	 cmpult	$R3,$L3,$O3	# did we overflow?
	cmpult	$R2,$CC,$CC	# overflow?
	 addq	$R4,$L4,$R4	# r=a+b;
	addq	$CC,$O2,$CC
	 cmpult	$R4,$L4,$O4	# did we overflow?
	addq	$R3,$CC,$R3	# c+= overflow
	 stq	$R1,0($16)	# r[0]=c
	cmpult	$R3,$CC,$CC	# overflow?
	 stq	$R2,8($16)	# r[1]=c
	addq	$CC,$O3,$CC
	 stq	$R3,16($16)	# r[2]=c
	addq	$R4,$CC,$R4	# c+= overflow
	 subq	$19,4,$19	# loop--
	cmpult	$R4,$CC,$CC	# overflow?
	 addq	$17,32,$17	# a++
	addq	$CC,$O4,$CC
	 stq	$R4,24($16)	# r[3]=c
	addq	$18,32,$18	# b++
	 addq	$16,32,$16	# r++

	blt	$19,$900
	 ldq	$L1,0($17)	# a[0]
	ldq	$R1,0($18)	# b[1]
	 br	$901
	.align 4
$945:
	ldq	$L1,0($17)	# a[0]
	 ldq	$R1,0($18)	# b[1]
	addq	$R1,$L1,$R1	# r=a+b;
	 subq	$19,1,$19	# loop--
	addq	$R1,$CC,$R1	# c+= overflow
	 addq	$17,8,$17	# a++
	cmpult	$R1,$L1,$O1	# did we overflow?
	 cmpult	$R1,$CC,$CC	# overflow?
	addq	$18,8,$18	# b++
	 stq	$R1,0($16)	# r[0]=c
	addq	$CC,$O1,$CC
	 addq	$16,8,$16	# r++

	bgt	$19,$945
	ret	$31,($26),1	# else exit

$900:
	addq	$19,4,$19
	bgt	$19,$945	# goto tail code
	ret	$31,($26),1	# else exit
	.end bn_add_words

	.align 3
	.globl bn_sub_words
	.ent bn_sub_words
bn_sub_words:
bn_sub_words..ng:
	.frame $30,0,$26,0
	.prologue 0

	subq	$19,4,$19
	bis	$31,$31,$CC	# carry = 0
 br	$800
	blt	$19,$800
	ldq	$L1,0($17)	# a[0]
	ldq	$R1,0($18)	# b[1]
	.align 3
$801:
	addq	$R1,$L1,$R1	# r=a+b;
	 ldq	$L2,8($17)	# a[1]
	cmpult	$R1,$L1,$O1	# did we overflow?
	 ldq	$R2,8($18)	# b[1]
	addq	$R1,$CC,$R1	# c+= overflow
	 ldq	$L3,16($17)	# a[2]
	cmpult	$R1,$CC,$CC	# overflow?
	 ldq	$R3,16($18)	# b[2]
	addq	$CC,$O1,$CC
	 ldq	$L4,24($17)	# a[3]
	addq	$R2,$L2,$R2	# r=a+b;
	 ldq	$R4,24($18)	# b[3]
	cmpult	$R2,$L2,$O2	# did we overflow?
	 addq	$R3,$L3,$R3	# r=a+b;
	addq	$R2,$CC,$R2	# c+= overflow
	 cmpult	$R3,$L3,$O3	# did we overflow?
	cmpult	$R2,$CC,$CC	# overflow?
	 addq	$R4,$L4,$R4	# r=a+b;
	addq	$CC,$O2,$CC
	 cmpult	$R4,$L4,$O4	# did we overflow?
	addq	$R3,$CC,$R3	# c+= overflow
	 stq	$R1,0($16)	# r[0]=c
	cmpult	$R3,$CC,$CC	# overflow?
	 stq	$R2,8($16)	# r[1]=c
	addq	$CC,$O3,$CC
	 stq	$R3,16($16)	# r[2]=c
	addq	$R4,$CC,$R4	# c+= overflow
	 subq	$19,4,$19	# loop--
	cmpult	$R4,$CC,$CC	# overflow?
	 addq	$17,32,$17	# a++
	addq	$CC,$O4,$CC
	 stq	$R4,24($16)	# r[3]=c
	addq	$18,32,$18	# b++
	 addq	$16,32,$16	# r++

	blt	$19,$800
	 ldq	$L1,0($17)	# a[0]
	ldq	$R1,0($18)	# b[1]
	 br	$801
	.align 4
$845:
	ldq	$L1,0($17)	# a[0]
	 ldq	$R1,0($18)	# b[1]
	cmpult	$L1,$R1,$O1	# will we borrow?
	 subq	$L1,$R1,$R1	# r=a-b;
	subq	$19,1,$19	# loop--
	 cmpult  $R1,$CC,$O2	# will we borrow?
	subq	$R1,$CC,$R1	# c+= overflow
	 addq	$17,8,$17	# a++
	addq	$18,8,$18	# b++
	 stq	$R1,0($16)	# r[0]=c
	addq	$O2,$O1,$CC
	 addq	$16,8,$16	# r++

	bgt	$19,$845
	ret	$31,($26),1	# else exit

$800:
	addq	$19,4,$19
	bgt	$19,$845	# goto tail code
	ret	$31,($26),1	# else exit
	.end bn_sub_words

 #
 # What follows was taken directly from the C compiler with a few
 # hacks to redo the lables.
 #
.text
	.align 3
	.globl bn_div_words
	.ent bn_div_words
bn_div_words:
	ldgp $29,0($27)
bn_div_words..ng:
	lda $30,-48($30)
	.frame $30,48,$26,0
	stq $26,0($30)
	stq $9,8($30)
	stq $10,16($30)
	stq $11,24($30)
	stq $12,32($30)
	stq $13,40($30)
	.mask 0x4003e00,-48
	.prologue 1
	bis $16,$16,$9
	bis $17,$17,$10
	bis $18,$18,$11
	bis $31,$31,$13
	bis $31,2,$12
	bne $11,$119
	lda $0,-1
	br $31,$136
	.align 4
$119:
	bis $11,$11,$16
	jsr $26,BN_num_bits_word
	ldgp $29,0($26)
	subq $0,64,$1
	beq $1,$120
	bis $31,1,$1
	sll $1,$0,$1
	cmpule $9,$1,$1
	bne $1,$120
 #	lda $16,_IO_stderr_
 #	lda $17,$C32
 #	bis $0,$0,$18
 #	jsr $26,fprintf
 #	ldgp $29,0($26)
	jsr $26,abort
	ldgp $29,0($26)
	.align 4
$120:
	bis $31,64,$3
	cmpult $9,$11,$2
	subq $3,$0,$1
	addl $1,$31,$0
	subq $9,$11,$1
	cmoveq $2,$1,$9
	beq $0,$122
	zapnot $0,15,$2
	subq $3,$0,$1
	sll $11,$2,$11
	sll $9,$2,$3
	srl $10,$1,$1
	sll $10,$2,$10
	bis $3,$1,$9
$122:
	srl $11,32,$5
	zapnot $11,15,$6
	lda $7,-1
	.align 5
$123:
	srl $9,32,$1
	subq $1,$5,$1
	bne $1,$126
	zapnot $7,15,$27
	br $31,$127
	.align 4
$126:
	bis $9,$9,$24
	bis $5,$5,$25
	divqu $24,$25,$27
$127:
	srl $10,32,$4
	.align 5
$128:
	mulq $27,$5,$1
	subq $9,$1,$3
	zapnot $3,240,$1
	bne $1,$129
	mulq $6,$27,$2
	sll $3,32,$1
	addq $1,$4,$1
	cmpule $2,$1,$2
	bne $2,$129
	subq $27,1,$27
	br $31,$128
	.align 4
$129:
	mulq $27,$6,$1
	mulq $27,$5,$4
	srl $1,32,$3
	sll $1,32,$1
	addq $4,$3,$4
	cmpult $10,$1,$2
	subq $10,$1,$10
	addq $2,$4,$2
	cmpult $9,$2,$1
	bis $2,$2,$4
	beq $1,$134
	addq $9,$11,$9
	subq $27,1,$27
$134:
	subl $12,1,$12
	subq $9,$4,$9
	beq $12,$124
	sll $27,32,$13
	sll $9,32,$2
	srl $10,32,$1
	sll $10,32,$10
	bis $2,$1,$9
	br $31,$123
	.align 4
$124:
	bis $13,$27,$0
$136:
	ldq $26,0($30)
	ldq $9,8($30)
	ldq $10,16($30)
	ldq $11,24($30)
	ldq $12,32($30)
	ldq $13,40($30)
	addq $30,48,$30
	ret $31,($26),1
	.end bn_div_words
EOF
	return($data);
	}