; ------------------------------------------------------------------------- ; Copyright (c) 2001, Dr Brian Gladman , Worcester, UK. ; All rights reserved. ; ; LICENSE TERMS ; ; The free distribution and use of this software in both source and binary ; form is allowed (with or without changes) provided that: ; ; 1. distributions of this source code include the above copyright ; notice, this list of conditions and the following disclaimer; ; ; 2. distributions in binary form include the above copyright ; notice, this list of conditions and the following disclaimer ; in the documentation and/or other associated materials; ; ; 3. the copyright holder's name is not used to endorse products ; built using this software without specific written permission. ; ; DISCLAIMER ; ; This software is provided 'as is' with no explcit or implied warranties ; in respect of any properties, including, but not limited to, correctness ; and fitness for purpose. ; ------------------------------------------------------------------------- ; Issue Date: 15/01/2002 ; An AES (Rijndael) implementation for the Pentium MMX family using the NASM ; assembler . This version only implements ; the standard AES block length (128 bits, 16 bytes) with the same interface ; as that used in my C/C++ implementation. This code does not preserve the ; eax, ecx or edx registers or the artihmetic status flags. However, the ebx, ; esi, edi, and ebp registers are preserved across calls. Only encryption ; and decryption are implemented here, the key schedule code being that from ; compiling aes.c with USE_ASM defined. This code uses VC++ register saving ; conentions; if it is used with another compiler, its conventions for using ; and saving registers will need to be checked. section .text use32 ; aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1]); ; aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1]); global _aes_enc_blk global _aes_dec_blk extern _ft_tab extern _fl_tab extern _it_tab extern _il_tab ;%define USE_MMX ; include this to use MMX registers for temporary storage ;%define USE_EMMS ; include this if you make use of floating point operations %ifdef USE_MMX %ifdef USE_EMMS %define EMMS_ON %endif %endif tlen: equ 1024 ; length of each of 4 'xor' arrays (256 32-bit words) ; offsets to parameters with one register pushed onto stack in_blk: equ 8 ; input byte array address parameter out_blk:equ 12 ; output byte array address parameter ctx: equ 16 ; AES context structure ; offsets in context structure ksch: equ 0 ; encryption key schedule base address nrnd: equ 256 ; number of rounds nblk: equ 260 ; number of rounds ; register mapping for encrypt and decrypt subroutines %define r0 eax %define r1 ebx %define r2 ecx %define r3 edx %define r4 esi %define r5 edi %define r6 ebp %define eaxl al %define eaxh ah %define ebxl bl %define ebxh bh %define ecxl cl %define ecxh ch %define edxl dl %define edxh dh ; This macro takes a 32-bit word representing a column and uses ; each of its four bytes to index into four tables of 256 32-bit ; words to obtain values that are then xored into the appropriate ; output registers r0, r1, r4 or r5. ; Parameters: ; %1 out_state[0] ; %2 out_state[1] ; %3 out_state[2] ; %4 out_state[3] ; %5 table base address ; %6 input register for the round (destroyed) ; %7 scratch register for the round %macro do_col 7 movzx %7,%6l xor %1,[4*%7+%5] movzx %7,%6h shr %6,16 xor %2,[4*%7+%5+tlen] movzx %7,%6l movzx %6,%6h xor %3,[4*%7+%5+2*tlen] xor %4,[4*%6+%5+3*tlen] %endmacro ; initialise output registers from the key schedule %macro do_fcol 8 mov %1,[%8] movzx %7,%6l mov %2,[%8+12] xor %1,[4*%7+%5] mov %4,[%8+ 4] movzx %7,%6h shr %6,16 xor %2,[4*%7+%5+tlen] movzx %7,%6l movzx %6,%6h xor %4,[4*%6+%5+3*tlen] mov %6,%3 mov %3,[%8+ 8] xor %3,[4*%7+%5+2*tlen] %endmacro ; initialise output registers from the key schedule %macro do_icol 8 mov %1,[%8] movzx %7,%6l mov %2,[%8+ 4] xor %1,[4*%7+%5] mov %4,[%8+12] movzx %7,%6h shr %6,16 xor %2,[4*%7+%5+tlen] movzx %7,%6l movzx %6,%6h xor %4,[4*%6+%5+3*tlen] mov %6,%3 mov %3,[%8+ 8] xor %3,[4*%7+%5+2*tlen] %endmacro ; These macros implement either MMX or stack based local variables %ifdef USE_MMX %macro save 2 movd mm%1,%2 %endmacro %macro restore 2 movd %1,mm%2 %endmacro %else %macro save 2 mov [esp+4*%1],%2 %endmacro %macro restore 2 mov %1,[esp+4*%2] %endmacro %endif ; This macro performs a forward encryption cycle. It is entered with ; the first previous round column values in r0, r1, r4 and r5 and ; exits with the final values in the same registers, using the MMX ; registers mm0-mm1 for temporary storage %macro fwd_rnd 1-2 _ft_tab ; mov current column values into the MMX registers mov r2,r0 save 0,r1 save 1,r5 ; compute new column values do_fcol r0,r5,r4,r1, %2, r2,r3, %1 do_col r4,r1,r0,r5, %2, r2,r3 restore r2,0 do_col r1,r0,r5,r4, %2, r2,r3 restore r2,1 do_col r5,r4,r1,r0, %2, r2,r3 %endmacro ; This macro performs an inverse encryption cycle. It is entered with ; the first previous round column values in r0, r1, r4 and r5 and ; exits with the final values in the same registers, using the MMX ; registers mm0-mm1 for temporary storage %macro inv_rnd 1-2 _it_tab ; mov current column values into the MMX registers mov r2,r0 save 0,r1 save 1,r5 ; compute new column values do_icol r0,r1,r4,r5, %2, r2,r3, %1 do_col r4,r5,r0,r1, %2, r2,r3 restore r2,0 do_col r1,r4,r5,r0, %2, r2,r3 restore r2,1 do_col r5,r0,r1,r4, %2, r2,r3 %endmacro ; AES (Rijndael) Encryption Subroutine _aes_enc_blk: push ebp mov ebp,[esp+ctx] ; pointer to context xor eax,eax test [ebp+nblk],byte 1 je .0 cmp eax,[ebp+nrnd] ; encryption/decryption flags jne short .1 .0: pop ebp ret ; CAUTION: the order and the values used in these assigns ; rely on the register mappings .1: push ebx mov r2,[esp+in_blk+4] push esi mov r3,[ebp+nrnd] ; number of rounds push edi lea r6,[ebp+ksch] ; key pointer ; input four columns and xor in first round key mov r0,[r2] mov r1,[r2+4] mov r4,[r2+8] mov r5,[r2+12] xor r0,[r6] xor r1,[r6+4] xor r4,[r6+8] xor r5,[r6+12] %ifndef USE_MMX sub esp,8 ; space for register saves on stack %endif add r6,16 ; increment to next round key sub r3,10 je .4 ; 10 rounds for 128-bit key add r6,32 sub r3,2 je .3 ; 12 rounds for 128-bit key add r6,32 .2: fwd_rnd r6-64 ; 14 rounds for 128-bit key fwd_rnd r6-48 .3: fwd_rnd r6-32 ; 12 rounds for 128-bit key fwd_rnd r6-16 .4: fwd_rnd r6 ; 10 rounds for 128-bit key fwd_rnd r6+ 16 fwd_rnd r6+ 32 fwd_rnd r6+ 48 fwd_rnd r6+ 64 fwd_rnd r6+ 80 fwd_rnd r6+ 96 fwd_rnd r6+112 fwd_rnd r6+128 fwd_rnd r6+144,_fl_tab ; last round uses a different table ; move final values to the output array. CAUTION: the ; order of these assigns rely on the register mappings %ifndef USE_MMX add esp,8 %endif mov r6,[esp+out_blk+12] mov [r6+12],r5 pop edi mov [r6+8],r4 pop esi mov [r6+4],r1 pop ebx mov [r6],r0 pop ebp mov eax,1 %ifdef EMMS_ON emms %endif ret ; AES (Rijndael) Decryption Subroutine _aes_dec_blk: push ebp mov ebp,[esp+ctx] ; pointer to context xor eax,eax test [ebp+nblk],byte 2 je .0 cmp eax,[ebp+nrnd] ; encryption/decryption flags jne short .1 .0: pop ebp ret ; CAUTION: the order and the values used in these assigns ; rely on the register mappings .1: push ebx mov r2,[esp+in_blk+4] push esi mov r3,[ebp+nrnd] ; number of rounds push edi lea r6,[ebp+ksch] ; key pointer mov r0,r3 shl r0,4 add r6,r0 ; input four columns and xor in first round key mov r0,[r2] mov r1,[r2+4] mov r4,[r2+8] mov r5,[r2+12] xor r0,[r6] xor r1,[r6+4] xor r4,[r6+8] xor r5,[r6+12] %ifndef USE_MMX sub esp,8 ; space for register saves on stack %endif sub r6,16 ; increment to next round key sub r3,10 je .4 ; 10 rounds for 128-bit key sub r6,32 sub r3,2 je .3 ; 12 rounds for 128-bit key sub r6,32 .2: inv_rnd r6+64 ; 14 rounds for 128-bit key inv_rnd r6+48 .3: inv_rnd r6+32 ; 12 rounds for 128-bit key inv_rnd r6+16 .4: inv_rnd r6 ; 10 rounds for 128-bit key inv_rnd r6- 16 inv_rnd r6- 32 inv_rnd r6- 48 inv_rnd r6- 64 inv_rnd r6- 80 inv_rnd r6- 96 inv_rnd r6-112 inv_rnd r6-128 inv_rnd r6-144,_il_tab ; last round uses a different table ; move final values to the output array. CAUTION: the ; order of these assigns rely on the register mappings %ifndef USE_MMX add esp,8 %endif mov r6,[esp+out_blk+12] mov [r6+12],r5 pop edi mov [r6+8],r4 pop esi mov [r6+4],r1 pop ebx mov [r6],r0 pop ebp mov eax,1 %ifdef EMMS_ON emms %endif ret end