/*
* Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* ---------------------------------------------------------------------------
* Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved.
*
* LICENSE TERMS
*
* The free distribution and use of this software in both source and binary
* form is allowed (with or without changes) provided that:
*
* 1. distributions of this source code include the above copyright
* notice, this list of conditions and the following disclaimer * 2. distributions in binary form include the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other associated materials * 3. the copyright holder's name is not used to endorse products
* built using this software without specific written permission.
*
* ALTERNATIVELY, provided that this notice is retained in full, this product
* may be distributed under the terms of the GNU General Public License (GPL),
* in which case the provisions of the GPL apply INSTEAD OF those given above.
*
* DISCLAIMER
*
* This software is provided 'as is' with no explicit or implied warranties
* in respect of its properties, including, but not limited to, correctness
* and/or fitness for purpose.
* ---------------------------------------------------------------------------
* Issue 31/01/2006
*
* This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h
* and the same define to be set here as well. If AES_V2C is set this file
* requires the C files aeskey.c and aestab.c for support.
*
* This is a full assembler implementation covering encryption, decryption and
* key scheduling. It uses 2k bytes of tables but its encryption and decryption
* performance is very close to that obtained using large tables. Key schedule
* expansion is slower for both encryption and decryption but this is likely to
* be offset by the much smaller load that this version places on the processor
* cache. I acknowledge the contribution made by Daniel Bernstein to aspects of
* the design of the AES round function used here.
*
* This code provides the standard AES block size (128 bits, 16 bytes) and the
* three standard AES key sizes (128, 192 and 256 bits). It has the same call
* interface as my C implementation. The ebx, esi, edi and ebp registers are
* preserved across calls but eax, ecx and edx and the artihmetic status flags
* are not.
*/
#include <mach/i386/asm.h>
#define AES_128 /* define if AES with 128 bit keys is needed */
#define AES_192 /* define if AES with 192 bit keys is needed */
#define AES_256 /* define if AES with 256 bit keys is needed */
#define AES_VAR /* define if a variable key size is needed */
#define ENCRYPTION /* define if encryption is needed */
#define DECRYPTION /* define if decryption is needed */
#define AES_REV_DKS /* define if key decryption schedule is reversed */
#ifndef ASM_X86_V2C
#define ENCRYPTION_KEY_SCHEDULE /* define if enc. key expansion is needed */
#define DECRYPTION_KEY_SCHEDULE /* define if dec. key expansion is needed */
#endif
/*
* The encryption key schedule has the following in memory layout where N is the
* number of rounds (10, 12 or 14):
*
* lo: | input key (round 0) | * | encryption round 2 |
* ....
* | encryption round N-1 |
* hi: | encryption round N |
*
* The decryption key schedule is normally set up so that it has the same
* layout as above by actually reversing the order of the encryption key
* schedule in memory (this happens when AES_REV_DKS is set):
*
* lo: | decryption round 0 | = | encryption round N |
* | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
* | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
* .... ....
* | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
* hi: | decryption round N | = | input key (round 0) |
*
* with rounds except the first and last modified using inv_mix_column()
* But if AES_REV_DKS is NOT set the order of keys is left as it is for
* encryption so that it has to be accessed in reverse when used for
* decryption (although the inverse mix column modifications are done)
*
* lo: | decryption round 0 | = | input key (round 0) |
* | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
* | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
* .... ....
* | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
* hi: | decryption round N | = | encryption round N |
*
* This layout is faster when the assembler key scheduling provided here
* is used.
*/
/* End of user defines */
#ifdef AES_VAR
#ifndef AES_128
#define AES_128
#endif
#ifndef AES_192
#define AES_192
#endif
#ifndef AES_256
#define AES_256
#endif
#endif
#ifdef AES_VAR
#define KS_LENGTH 60
#else
#ifdef AES_256
#define KS_LENGTH 60
#else
#ifdef AES_192
#define KS_LENGTH 52
#else
#define KS_LENGTH 44
#endif
#endif
#endif
/*
* These macros implement stack based local variables
*/
#define save(r1) \
movl %r1, (%esp)#define restore(r1) \
movl (%esp), %r1#define do_call(f, n) \
call EXT(f)
/*
* finite field multiplies by {02}, {04} and {08}
*/
#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
/*
* finite field multiplies required in table generation
*/
#define f3(x) (f2(x) ^ x)
#define f9(x) (f8(x) ^ x)
#define fb(x) (f8(x) ^ f2(x) ^ x)
#define fd(x) (f8(x) ^ f4(x) ^ x)
#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
#define etab_0(x) enc_tab+4(,x,8)
#define etab_1(x) enc_tab+3(,x,8)
#define etab_2(x) enc_tab+2(,x,8)
#define etab_3(x) enc_tab+1(,x,8)
#define etab_b(x) etab_3(x)
#define btab_0(x) enc_tab+6(,x,8)
#define btab_1(x) enc_tab+5(,x,8)
#define btab_2(x) enc_tab+4(,x,8)
#define btab_3(x) enc_tab+3(,x,8)
/*
* ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
* round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
*
* Input:
*
* EAX column[0]
* EBX column[1]
* ECX column[2]
* EDX column[3]
* ESI column key[round][2]
* EDI column key[round][3]
* EBP scratch
*
* Output:
*
* EBP column[0] unkeyed
* EBX column[1] unkeyed
* ESI column[2] keyed
* EDI column[3] keyed
* EAX scratch
* ECX scratch
* EDX scratch
*/
#define rnd_fun(m1, m2) \
roll $16, %ebx ## m1 ## _zo(esi, cl, 0, ebp) m1(esi, bh, 3, ebp) m1(edi, ah, 1, ebp) ## m2 ## _zo(ebp, al, 0, ebp) shrl $16, %ebx orl %ebx, %eax \
m1(ebp, ah, 1, ebx) m2(ebx, dl, 2, ebx) ## m1 ## _zo(ebx, al, 0, edx) shrl $16, %eax \
m1(ebp, cl, 2, edx) m1(esi, al, 2, edx)
/*
* Basic MOV and XOR Operations for normal rounds
*/
#define nr_xor_zo nr_xor
#define nr_xor(r1, r2, r3, r4) \
movzbl %r2, %r4
#define nr_mov_zo nr_mov
#define nr_mov(r1, r2, r3, r4) \
movzbl %r2, %r4
/*
* Basic MOV and XOR Operations for last round
*/
#if 1
#define lr_xor_zo(r1, r2, r3, r4) \
movzbl %r2, %r4 xor %r4, %r1#define lr_xor(r1, r2, r3, r4) \
movzbl %r2, %r4 shll $(8*r3), %r4
#define lr_mov_zo(r1, r2, r3, r4) \
movzbl %r2, %r4
#define lr_mov(r1, r2, r3, r4) \
movzbl %r2, %r4 shll $(8*r3), %r1#else /* less effective but worth leaving as an option */
#define lr_xor_zo lr_xor
#define lr_xor(r1, r2, r3, r4) \
movzbl %r2, %r4 andl $(0x000000ff << 8 * r3), %r4
#define lr_mov_zo lr_mov
#define lr_mov(r1, r2, r3, r4) \
movzbl %r2, %r4 andl $(0x000000ff << 8 * r3), %r1#endif
/*
* Apply S-Box to the 4 bytes in a 32-bit word and rotate left 3 byte positions
*
* r1 : output is xored into this register
* r2 : input: a => eax, b => ebx, c => ecx, d => edx
* r3 : scratch register
*/
#define l3s_col(r1, r2, r3) \
lr_xor_zo(r1, ## r2 ## h, 0, r3) shrl $16, %e ## r2 ## x lr_xor(r1, ## r2 ## l, 1, r3)/*
* offsets to parameters
*/
#define in_blk 4 /* input byte array address parameter */
#define out_blk 8 /* output byte array address parameter */
#define ctx 12 /* AES context structure */
#define stk_spc 20 /* stack space */
#ifdef ENCRYPTION
#define ENCRYPTION_TABLE
#define enc_round \
addl $16, %ebp movl 8(%ebp), %esi \
rnd_fun(nr_xor, nr_mov) movl %ebp, %eax movl %edi, %edx xorl (%ebp), %eax
#define enc_last_round \
addl $16, %ebp movl 8(%ebp), %esi \
rnd_fun(lr_xor, lr_mov) movl %ebp, %eax xorl (%ebp), %eax
.section __TEXT, __text
/*
* AES Encryption Subroutine
*/
Entry(aes_encrypt)
subl $stk_spc, %esp
movl %ebp, 16(%esp)
movl %ebx, 12(%esp)
movl %esi, 8(%esp)
movl %edi, 4(%esp)
movl in_blk+stk_spc(%esp), %esi /* input pointer */
movl (%esi), %eax
movl 4(%esi), %ebx
movl 8(%esi), %ecx
movl 12(%esi), %edx
movl ctx+stk_spc(%esp), %ebp /* key pointer */
movzbl 4*KS_LENGTH(%ebp), %edi
xorl (%ebp), %eax
xorl 4(%ebp), %ebx
xorl 8(%ebp), %ecx
xorl 12(%ebp), %edx
/*
* determine the number of rounds
*/
cmpl $10*16, %edi
je aes_encrypt.3
cmpl $12*16, %edi
je aes_encrypt.2
cmpl $14*16, %edi
je aes_encrypt.1
movl $-1, %eax
jmp aes_encrypt.5
aes_encrypt.1:
enc_round
enc_round
aes_encrypt.2:
enc_round
enc_round
aes_encrypt.3:
enc_round
enc_round
enc_round
enc_round
enc_round
enc_round
enc_round
enc_round
enc_round
enc_last_round
movl out_blk+stk_spc(%esp), %edx
movl %eax, (%edx)
movl %ebx, 4(%edx)
movl %esi, 8(%edx)
movl %edi, 12(%edx)
xorl %eax, %eax
aes_encrypt.5:
movl 16(%esp), %ebp
movl 12(%esp), %ebx
movl 8(%esp), %esi
movl 4(%esp), %edi
addl $stk_spc, %esp
ret
#endif
/*
* For r2 == 16, or r2 == 24 && r1 == 7, or r2 ==32 && r1 == 6
*/
#define f_key(r1, r2, rc_val) \
l3s_col(esi, a, ebx) \
movl %esi, r1*r2(%ebp) movl %edi, r1*r2+4(%ebp) movl %ecx, r1*r2+8(%ebp) movl %edx, r1*r2+12(%ebp)
/*
* For r2 == 24 && r1 == 0 to 6
*/
#define f_key_24(r1, r2, rc_val) \
f_key(r1, r2, rc_val) xorl r1*r2+16-r2(%ebp), %eax xorl r1*r2+20-r2(%ebp), %eax
/*
* For r2 ==32 && r1 == 0 to 5
*/
#define f_key_32(r1, r2, rc_val) \
f_key(r1, r2, rc_val) roll $8, %eax movl r1*r2+16-r2(%ebp), %edx movl %edx, %eax movl %eax, r1*r2+16(%ebp) movl %eax, r1*r2+20(%ebp) movl %eax, r1*r2+24(%ebp) movl %eax, r1*r2+28(%ebp)#ifdef ENCRYPTION_KEY_SCHEDULE
#ifdef AES_128
#ifndef ENCRYPTION_TABLE
#define ENCRYPTION_TABLE
#endif
Entry(aes_encrypt_key128)
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 24(%esp), %ebp
movl $10*16, 4*KS_LENGTH(%ebp)
movl 20(%esp), %ebx
movl (%ebx), %esi
movl %esi, (%ebp)
movl 4(%ebx), %edi
movl %edi, 4(%ebp)
movl 8(%ebx), %ecx
movl %ecx, 8(%ebp)
movl 12(%ebx), %edx
movl %edx, 12(%ebp)
addl $16, %ebp
movl %edx, %eax
f_key(0, 16, 1)
f_key(1, 16, 2)
f_key(2, 16, 4)
f_key(3, 16, 8)
f_key(4, 16, 16)
f_key(5, 16, 32)
f_key(6, 16, 64)
f_key(7, 16, 128)
f_key(8, 16, 27)
f_key(9, 16, 54)
popl %edi
popl %esi
popl %ebx
popl %ebp
xorl %eax, %eax
ret
#endif
#ifdef AES_192
#ifndef ENCRYPTION_TABLE
#define ENCRYPTION_TABLE
#endif
Entry(aes_encrypt_key192)
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 24(%esp), %ebp
movl $12*16, 4*KS_LENGTH(%ebp)
movl 20(%esp), %ebx
movl (%ebx), %esi
movl %esi, (%ebp)
movl 4(%ebx), %edi
movl %edi, 4(%ebp)
movl 8(%ebx), %ecx
movl %ecx, 8(%ebp)
movl 12(%ebx), %edx
movl %edx, 12(%ebp)
movl 16(%ebx), %eax
movl %eax, 16(%ebp)
movl 20(%ebx), %eax
movl %eax, 20(%ebp)
addl $24, %ebp
f_key_24(0, 24, 1)
f_key_24(1, 24, 2)
f_key_24(2, 24, 4)
f_key_24(3, 24, 8)
f_key_24(4, 24, 16)
f_key_24(5, 24, 32)
f_key_24(6, 24, 64)
f_key(7, 24, 128)
popl %edi
popl %esi
popl %ebx
popl %ebp
xorl %eax, %eax
ret
#endif
#ifdef AES_256
#ifndef ENCRYPTION_TABLE
#define ENCRYPTION_TABLE
#endif
Entry(aes_encrypt_key256)
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 24(%esp), %ebp
movl $14*16, 4*KS_LENGTH(%ebp)
movl 20(%esp), %ebx
movl (%ebx), %esi
movl %esi, (%ebp)
movl 4(%ebx), %edi
movl %edi, 4(%ebp)
movl 8(%ebx), %ecx
movl %ecx, 8(%ebp)
movl 12(%ebx), %edx
movl %edx, 12(%ebp)
movl 16(%ebx), %eax
movl %eax, 16(%ebp)
movl 20(%ebx), %eax
movl %eax, 20(%ebp)
movl 24(%ebx), %eax
movl %eax, 24(%ebp)
movl 28(%ebx), %eax
movl %eax, 28(%ebp)
addl $32, %ebp
f_key_32(0, 32, 1)
f_key_32(1, 32, 2)
f_key_32(2, 32, 4)
f_key_32(3, 32, 8)
f_key_32(4, 32, 16)
f_key_32(5, 32, 32)
f_key(6, 32, 64)
popl %edi
popl %esi
popl %ebx
popl %ebp
xorl %eax, %eax
ret
#endif
#ifdef AES_VAR
#ifndef ENCRYPTION_TABLE
#define ENCRYPTION_TABLE
#endif
Entry(aes_encrypt_key)
movl 4(%esp), %ecx
movl 8(%esp), %eax
movl 12(%esp), %edx
pushl %edx
pushl %ecx
cmpl $16, %eax
je aes_encrypt_key.1
cmpl $128, %eax
je aes_encrypt_key.1
cmpl $24, %eax
je aes_encrypt_key.2
cmpl $192, %eax
je aes_encrypt_key.2
cmpl $32, %eax
je aes_encrypt_key.3
cmpl $256, %eax
je aes_encrypt_key.3
movl $-1, %eax
addl $8, %esp
ret
aes_encrypt_key.1:
do_call(aes_encrypt_key128, 8)
ret
aes_encrypt_key.2:
do_call(aes_encrypt_key192, 8)
ret
aes_encrypt_key.3:
do_call(aes_encrypt_key256, 8)
ret
#endif
#endif
#ifdef ENCRYPTION_TABLE
# S-box data - 256 entries
.section __DATA, __data
.align ALIGN
#define u8(x) 0, x, x, f3(x), f2(x), x, x, f3(x)
enc_tab:
.byte u8(0x63),u8(0x7c),u8(0x77),u8(0x7b),u8(0xf2),u8(0x6b),u8(0x6f),u8(0xc5)
.byte u8(0x30),u8(0x01),u8(0x67),u8(0x2b),u8(0xfe),u8(0xd7),u8(0xab),u8(0x76)
.byte u8(0xca),u8(0x82),u8(0xc9),u8(0x7d),u8(0xfa),u8(0x59),u8(0x47),u8(0xf0)
.byte u8(0xad),u8(0xd4),u8(0xa2),u8(0xaf),u8(0x9c),u8(0xa4),u8(0x72),u8(0xc0)
.byte u8(0xb7),u8(0xfd),u8(0x93),u8(0x26),u8(0x36),u8(0x3f),u8(0xf7),u8(0xcc)
.byte u8(0x34),u8(0xa5),u8(0xe5),u8(0xf1),u8(0x71),u8(0xd8),u8(0x31),u8(0x15)
.byte u8(0x04),u8(0xc7),u8(0x23),u8(0xc3),u8(0x18),u8(0x96),u8(0x05),u8(0x9a)
.byte u8(0x07),u8(0x12),u8(0x80),u8(0xe2),u8(0xeb),u8(0x27),u8(0xb2),u8(0x75)
.byte u8(0x09),u8(0x83),u8(0x2c),u8(0x1a),u8(0x1b),u8(0x6e),u8(0x5a),u8(0xa0)
.byte u8(0x52),u8(0x3b),u8(0xd6),u8(0xb3),u8(0x29),u8(0xe3),u8(0x2f),u8(0x84)
.byte u8(0x53),u8(0xd1),u8(0x00),u8(0xed),u8(0x20),u8(0xfc),u8(0xb1),u8(0x5b)
.byte u8(0x6a),u8(0xcb),u8(0xbe),u8(0x39),u8(0x4a),u8(0x4c),u8(0x58),u8(0xcf)
.byte u8(0xd0),u8(0xef),u8(0xaa),u8(0xfb),u8(0x43),u8(0x4d),u8(0x33),u8(0x85)
.byte u8(0x45),u8(0xf9),u8(0x02),u8(0x7f),u8(0x50),u8(0x3c),u8(0x9f),u8(0xa8)
.byte u8(0x51),u8(0xa3),u8(0x40),u8(0x8f),u8(0x92),u8(0x9d),u8(0x38),u8(0xf5)
.byte u8(0xbc),u8(0xb6),u8(0xda),u8(0x21),u8(0x10),u8(0xff),u8(0xf3),u8(0xd2)
.byte u8(0xcd),u8(0x0c),u8(0x13),u8(0xec),u8(0x5f),u8(0x97),u8(0x44),u8(0x17)
.byte u8(0xc4),u8(0xa7),u8(0x7e),u8(0x3d),u8(0x64),u8(0x5d),u8(0x19),u8(0x73)
.byte u8(0x60),u8(0x81),u8(0x4f),u8(0xdc),u8(0x22),u8(0x2a),u8(0x90),u8(0x88)
.byte u8(0x46),u8(0xee),u8(0xb8),u8(0x14),u8(0xde),u8(0x5e),u8(0x0b),u8(0xdb)
.byte u8(0xe0),u8(0x32),u8(0x3a),u8(0x0a),u8(0x49),u8(0x06),u8(0x24),u8(0x5c)
.byte u8(0xc2),u8(0xd3),u8(0xac),u8(0x62),u8(0x91),u8(0x95),u8(0xe4),u8(0x79)
.byte u8(0xe7),u8(0xc8),u8(0x37),u8(0x6d),u8(0x8d),u8(0xd5),u8(0x4e),u8(0xa9)
.byte u8(0x6c),u8(0x56),u8(0xf4),u8(0xea),u8(0x65),u8(0x7a),u8(0xae),u8(0x08)
.byte u8(0xba),u8(0x78),u8(0x25),u8(0x2e),u8(0x1c),u8(0xa6),u8(0xb4),u8(0xc6)
.byte u8(0xe8),u8(0xdd),u8(0x74),u8(0x1f),u8(0x4b),u8(0xbd),u8(0x8b),u8(0x8a)
.byte u8(0x70),u8(0x3e),u8(0xb5),u8(0x66),u8(0x48),u8(0x03),u8(0xf6),u8(0x0e)
.byte u8(0x61),u8(0x35),u8(0x57),u8(0xb9),u8(0x86),u8(0xc1),u8(0x1d),u8(0x9e)
.byte u8(0xe1),u8(0xf8),u8(0x98),u8(0x11),u8(0x69),u8(0xd9),u8(0x8e),u8(0x94)
.byte u8(0x9b),u8(0x1e),u8(0x87),u8(0xe9),u8(0xce),u8(0x55),u8(0x28),u8(0xdf)
.byte u8(0x8c),u8(0xa1),u8(0x89),u8(0x0d),u8(0xbf),u8(0xe6),u8(0x42),u8(0x68)
.byte u8(0x41),u8(0x99),u8(0x2d),u8(0x0f),u8(0xb0),u8(0x54),u8(0xbb),u8(0x16)
#endif
#ifdef DECRYPTION
#define DECRYPTION_TABLE
#define dtab_0(x) dec_tab(,x,8)
#define dtab_1(x) dec_tab+3(,x,8)
#define dtab_2(x) dec_tab+2(,x,8)
#define dtab_3(x) dec_tab+1(,x,8)
#define dtab_x(x) dec_tab+7(,x,8)
#define irn_fun(m1, m2) \
roll $16, %eax ## m1 ## _zo(esi, cl, 0, ebp) m1(esi, al, 2, ebp) m1(edi, ch, 1, ebp) ## m2 ## _zo(ebp, bl, 0, ebp) shrl $16, %eax orl %eax, %ebx \
m1(ebp, bh, 1, eax) m2(eax, cl, 2, ecx) m1(eax, dh, 1, ecx) shrl $16, %ebx \
m1(esi, dh, 3, ecx) m1(eax, bh, 3, ecx)
/*
* Basic MOV and XOR Operations for normal rounds
*/
#define ni_xor_zo ni_xor
#define ni_xor(r1, r2, r3, r4) \
movzbl %r2, %r4
#define ni_mov_zo ni_mov
#define ni_mov(r1, r2, r3, r4) \
movzbl %r2, %r4
/*
* Basic MOV and XOR Operations for last round
*/
#define li_xor_zo(r1, r2, r3, r4) \
movzbl %r2, %r4 xor %r4, %r1#define li_xor(r1, r2, r3, r4) \
movzbl %r2, %r4 shll $(8*r3), %r4
#define li_mov_zo(r1, r2, r3, r4) \
movzbl %r2, %r4
#define li_mov(r1, r2, r3, r4) \
movzbl %r2, %r4 shl $(8*r3), %r1#ifdef AES_REV_DKS
#define dec_round \
addl $16, %ebp movl 8(%ebp), %esi \
irn_fun(ni_xor, ni_mov) movl %ebp, %ebx movl %edi, %edx xorl (%ebp), %eax
#define dec_last_round \
addl $16, %ebp movl 8(%ebp), %esi \
irn_fun(li_xor, li_mov) movl %ebp, %ebx xorl (%ebp), %eax
#else
#define dec_round \
subl $16, %ebp movl 8(%ebp), %esi \
irn_fun(ni_xor, ni_mov) movl %ebp, %ebx movl %edi, %edx xorl (%ebp), %eax
#define dec_last_round \
subl $16, %ebp movl 8(%ebp), %esi \
irn_fun(li_xor, li_mov) movl %ebp, %ebx xorl (%ebp), %eax
#endif /* AES_REV_DKS */
.section __TEXT, __text
/*
* AES Decryption Subroutine
*/
Entry(aes_decrypt)
subl $stk_spc, %esp
movl %ebp, 16(%esp)
movl %ebx, 12(%esp)
movl %esi, 8(%esp)
movl %edi, 4(%esp)
/*
* input four columns and xor in first round key
*/
movl in_blk+stk_spc(%esp), %esi /* input pointer */
movl (%esi), %eax
movl 4(%esi), %ebx
movl 8(%esi), %ecx
movl 12(%esi), %edx
leal 16(%esi), %esi
movl ctx+stk_spc(%esp), %ebp /* key pointer */
movzbl 4*KS_LENGTH(%ebp), %edi
#ifndef AES_REV_DKS /* if decryption key schedule is not reversed */
leal (%ebp,%edi), %ebp /* we have to access it from the top down */
#endif
xorl (%ebp), %eax /* key schedule */
xorl 4(%ebp), %ebx
xorl 8(%ebp), %ecx
xorl 12(%ebp), %edx
/*
* determine the number of rounds
*/
cmpl $10*16, %edi
je aes_decrypt.3
cmpl $12*16, %edi
je aes_decrypt.2
cmpl $14*16, %edi
je aes_decrypt.1
movl $-1, %eax
jmp aes_decrypt.5
aes_decrypt.1:
dec_round
dec_round
aes_decrypt.2:
dec_round
dec_round
aes_decrypt.3:
dec_round
dec_round
dec_round
dec_round
dec_round
dec_round
dec_round
dec_round
dec_round
dec_last_round
/*
* move final values to the output array.
*/
movl out_blk+stk_spc(%esp), %ebp
movl %eax, (%ebp)
movl %ebx, 4(%ebp)
movl %esi, 8(%ebp)
movl %edi, 12(%ebp)
xorl %eax, %eax
aes_decrypt.5:
movl 16(%esp), %ebp
movl 12(%esp), %ebx
movl 8(%esp), %esi
movl 4(%esp), %edi
addl $stk_spc, %esp
ret
#endif
#define inv_mix_col \
movzbl %dl, %ebx movl dtab_0(%ebx), %eax shrl $16, %edx xorl dtab_1(%ebx), %eax movzbl etab_b(%ebx), %ebx movzbl %dh, %ebx xorl dtab_3(%ebx), %eax#ifdef DECRYPTION_KEY_SCHEDULE
#ifdef AES_128
#ifndef DECRYPTION_TABLE
#define DECRYPTION_TABLE
#endif
Entry(aes_decrypt_key128)
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 24(%esp), %eax /* context */
movl 20(%esp), %edx /* key */
pushl %eax
pushl %edx
do_call(aes_encrypt_key128, 8)
movl $10*16, %eax
movl 24(%esp), %esi /* pointer to first round key */
leal (%esi,%eax), %edi /* pointer to last round key */
addl $32, %esi
/* the inverse mix column transformation */
movl -16(%esi), %edx /* needs to be applied to all round keys */
inv_mix_col
movl %eax, -16(%esi) /* transforming the four sub-keys in the */
movl -12(%esi), %edx /* second round key */
inv_mix_col
movl %eax, -12(%esi) /* transformations for subsequent rounds */
movl -8(%esi), %edx /* can then be made more efficient by */
inv_mix_col
movl %eax, -8(%esi) /* in the encryption round key ek[r]: */
movl -4(%esi), %edx
inv_mix_col
movl %eax, -4(%esi) /* where n is 1..3. Hence the corresponding */
aes_decrypt_key128.0:
movl (%esi), %edx /* subkeys in the decryption round key dk[r] */
inv_mix_col
movl %eax, (%esi) /* GF(256): */
xorl -12(%esi), %eax
movl %eax, 4(%esi) /* dk[r][n] = dk[r][n-1] ^ dk[r-1][n] */
xorl -8(%esi), %eax
movl %eax, 8(%esi) /* So we only need one inverse mix column */
xorl -4(%esi), %eax /* operation (n = 0) for each four word cycle */
movl %eax, 12(%esi) /* in the expanded key. */
addl $16, %esi
cmpl %esi, %edi
jg aes_decrypt_key128.0
jmp dec_end
#endif
#ifdef AES_192
#ifndef DECRYPTION_TABLE
#define DECRYPTION_TABLE
#endif
Entry(aes_decrypt_key192)
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 24(%esp), %eax /* context */
movl 20(%esp), %edx /* key */
pushl %eax
pushl %edx
do_call(aes_encrypt_key192, 8)
movl $12*16, %eax
movl 24(%esp), %esi /* first round key */
leal (%esi,%eax), %edi /* last round key */
addl $48, %esi /* the first 6 words are the key, of */
/* which the top 2 words are part of */
movl -32(%esi), %edx /* the second round key and hence */
inv_mix_col
movl %eax, -32(%esi) /* need to do a further six values prior */
movl -28(%esi), %edx /* to using a more efficient technique */
inv_mix_col
movl %eax, -28(%esi)
/* dk[r][n] = dk[r][n-1] ^ dk[r-1][n] */
movl -24(%esi), %edx
inv_mix_col
movl %eax, -24(%esi) /* cycle is now 6 words long */
movl -20(%esi), %edx
inv_mix_col
movl %eax, -20(%esi)
movl -16(%esi), %edx
inv_mix_col
movl %eax, -16(%esi)
movl -12(%esi), %edx
inv_mix_col
movl %eax, -12(%esi)
movl -8(%esi), %edx
inv_mix_col
movl %eax, -8(%esi)
movl -4(%esi), %edx
inv_mix_col
movl %eax, -4(%esi)
aes_decrypt_key192.0:
movl (%esi), %edx /* expanded key is 13 * 4 = 44 32-bit words */
inv_mix_col
movl %eax, (%esi) /* using inv_mix_col. We have already done 8 */
xorl -20(%esi), %eax /* of these so 36 are left - hence we need */
movl %eax, 4(%esi) /* exactly 6 loops of six here */
xorl -16(%esi), %eax
movl %eax, 8(%esi)
xorl -12(%esi), %eax
movl %eax, 12(%esi)
xorl -8(%esi), %eax
movl %eax, 16(%esi)
xorl -4(%esi), %eax
movl %eax, 20(%esi)
addl $24, %esi
cmpl %esi, %edi
jg aes_decrypt_key192.0
jmp dec_end
#endif
#ifdef AES_256
#ifndef DECRYPTION_TABLE
#define DECRYPTION_TABLE
#endif
Entry(aes_decrypt_key256)
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 24(%esp), %eax
movl 20(%esp), %edx
pushl %eax
pushl %edx
do_call(aes_encrypt_key256, 8)
movl $14*16, %eax
movl 24(%esp), %esi
leal (%esi,%eax), %edi
addl $64, %esi
movl -48(%esi), %edx /* the primary key is 8 words, of which */
inv_mix_col
movl %eax, -48(%esi)
movl -44(%esi), %edx
inv_mix_col
movl %eax, -44(%esi)
movl -40(%esi), %edx
inv_mix_col
movl %eax, -40(%esi)
movl -36(%esi), %edx
inv_mix_col
movl %eax, -36(%esi)
movl -32(%esi), %edx /* the encryption key expansion cycle is */
inv_mix_col
movl %eax, -32(%esi) /* start by doing one complete block */
movl -28(%esi), %edx
inv_mix_col
movl %eax, -28(%esi)
movl -24(%esi), %edx
inv_mix_col
movl %eax, -24(%esi)
movl -20(%esi), %edx
inv_mix_col
movl %eax, -20(%esi)
movl -16(%esi), %edx
inv_mix_col
movl %eax, -16(%esi)
movl -12(%esi), %edx
inv_mix_col
movl %eax, -12(%esi)
movl -8(%esi), %edx
inv_mix_col
movl %eax, -8(%esi)
movl -4(%esi), %edx
inv_mix_col
movl %eax, -4(%esi)
aes_decrypt_key256.0:
movl (%esi), %edx /* we can now speed up the remaining */
inv_mix_col
movl %eax, (%esi) /* outlined earlier. But note that */
xorl -28(%esi), %eax /* there is one extra inverse mix */
movl %eax, 4(%esi) /* column operation as the 256 bit */
xorl -24(%esi), %eax /* key has an extra non-linear step */
movl %eax, 8(%esi) /* for the midway element. */
xorl -20(%esi), %eax
movl %eax, 12(%esi) /* the expanded key is 15 * 4 = 60 */
movl 16(%esi), %edx /* 32-bit words of which 52 need to */
inv_mix_col
movl %eax, 16(%esi) /* 12 so 40 are left - which means */
xorl -12(%esi), %eax /* that we need exactly 5 loops of 8 */
movl %eax, 20(%esi)
xorl -8(%esi), %eax
movl %eax, 24(%esi)
xorl -4(%esi), %eax
movl %eax, 28(%esi)
addl $32, %esi
cmpl %esi, %edi
jg aes_decrypt_key256.0
#endif
dec_end:
#ifdef AES_REV_DKS
movl 24(%esp), %esi /* this reverses the order of the */
dec_end.1:
movl (%esi), %eax /* round keys if required */
movl 4(%esi), %ebx
movl (%edi), %ebp
movl 4(%edi), %edx
movl %ebp, (%esi)
movl %edx, 4(%esi)
movl %eax, (%edi)
movl %ebx, 4(%edi)
movl 8(%esi), %eax
movl 12(%esi), %ebx
movl 8(%edi), %ebp
movl 12(%edi), %edx
movl %ebp, 8(%esi)
movl %edx, 12(%esi)
movl %eax, 8(%edi)
movl %ebx, 12(%edi)
addl $16, %esi
subl $16, %edi
cmpl %esi, %edi
jg dec_end.1
#endif
popl %edi
popl %esi
popl %ebx
popl %ebp
xorl %eax, %eax
ret
#ifdef AES_VAR
Entry(aes_decrypt_key)
movl 4(%esp), %ecx
movl 8(%esp), %eax
movl 12(%esp), %edx
pushl %edx
pushl %ecx
cmpl $16, %eax
je aes_decrypt_key.1
cmpl $128, %eax
je aes_decrypt_key.1
cmpl $24, %eax
je aes_decrypt_key.2
cmpl $192, %eax
je aes_decrypt_key.2
cmpl $32, %eax
je aes_decrypt_key.3
cmpl $256, %eax
je aes_decrypt_key.3
movl $-1, %eax
addl $8, %esp
ret
aes_decrypt_key.1:
do_call(aes_decrypt_key128, 8)
ret
aes_decrypt_key.2:
do_call(aes_decrypt_key192, 8)
ret
aes_decrypt_key.3:
do_call(aes_decrypt_key256, 8)
ret
#endif
#endif
#ifdef DECRYPTION_TABLE
/*
* Inverse S-box data - 256 entries
*/
.section __DATA, __data
.align ALIGN
#define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x
dec_tab:
.byte v8(0x52),v8(0x09),v8(0x6a),v8(0xd5),v8(0x30),v8(0x36),v8(0xa5),v8(0x38)
.byte v8(0xbf),v8(0x40),v8(0xa3),v8(0x9e),v8(0x81),v8(0xf3),v8(0xd7),v8(0xfb)
.byte v8(0x7c),v8(0xe3),v8(0x39),v8(0x82),v8(0x9b),v8(0x2f),v8(0xff),v8(0x87)
.byte v8(0x34),v8(0x8e),v8(0x43),v8(0x44),v8(0xc4),v8(0xde),v8(0xe9),v8(0xcb)
.byte v8(0x54),v8(0x7b),v8(0x94),v8(0x32),v8(0xa6),v8(0xc2),v8(0x23),v8(0x3d)
.byte v8(0xee),v8(0x4c),v8(0x95),v8(0x0b),v8(0x42),v8(0xfa),v8(0xc3),v8(0x4e)
.byte v8(0x08),v8(0x2e),v8(0xa1),v8(0x66),v8(0x28),v8(0xd9),v8(0x24),v8(0xb2)
.byte v8(0x76),v8(0x5b),v8(0xa2),v8(0x49),v8(0x6d),v8(0x8b),v8(0xd1),v8(0x25)
.byte v8(0x72),v8(0xf8),v8(0xf6),v8(0x64),v8(0x86),v8(0x68),v8(0x98),v8(0x16)
.byte v8(0xd4),v8(0xa4),v8(0x5c),v8(0xcc),v8(0x5d),v8(0x65),v8(0xb6),v8(0x92)
.byte v8(0x6c),v8(0x70),v8(0x48),v8(0x50),v8(0xfd),v8(0xed),v8(0xb9),v8(0xda)
.byte v8(0x5e),v8(0x15),v8(0x46),v8(0x57),v8(0xa7),v8(0x8d),v8(0x9d),v8(0x84)
.byte v8(0x90),v8(0xd8),v8(0xab),v8(0x00),v8(0x8c),v8(0xbc),v8(0xd3),v8(0x0a)
.byte v8(0xf7),v8(0xe4),v8(0x58),v8(0x05),v8(0xb8),v8(0xb3),v8(0x45),v8(0x06)
.byte v8(0xd0),v8(0x2c),v8(0x1e),v8(0x8f),v8(0xca),v8(0x3f),v8(0x0f),v8(0x02)
.byte v8(0xc1),v8(0xaf),v8(0xbd),v8(0x03),v8(0x01),v8(0x13),v8(0x8a),v8(0x6b)
.byte v8(0x3a),v8(0x91),v8(0x11),v8(0x41),v8(0x4f),v8(0x67),v8(0xdc),v8(0xea)
.byte v8(0x97),v8(0xf2),v8(0xcf),v8(0xce),v8(0xf0),v8(0xb4),v8(0xe6),v8(0x73)
.byte v8(0x96),v8(0xac),v8(0x74),v8(0x22),v8(0xe7),v8(0xad),v8(0x35),v8(0x85)
.byte v8(0xe2),v8(0xf9),v8(0x37),v8(0xe8),v8(0x1c),v8(0x75),v8(0xdf),v8(0x6e)
.byte v8(0x47),v8(0xf1),v8(0x1a),v8(0x71),v8(0x1d),v8(0x29),v8(0xc5),v8(0x89)
.byte v8(0x6f),v8(0xb7),v8(0x62),v8(0x0e),v8(0xaa),v8(0x18),v8(0xbe),v8(0x1b)
.byte v8(0xfc),v8(0x56),v8(0x3e),v8(0x4b),v8(0xc6),v8(0xd2),v8(0x79),v8(0x20)
.byte v8(0x9a),v8(0xdb),v8(0xc0),v8(0xfe),v8(0x78),v8(0xcd),v8(0x5a),v8(0xf4)
.byte v8(0x1f),v8(0xdd),v8(0xa8),v8(0x33),v8(0x88),v8(0x07),v8(0xc7),v8(0x31)
.byte v8(0xb1),v8(0x12),v8(0x10),v8(0x59),v8(0x27),v8(0x80),v8(0xec),v8(0x5f)
.byte v8(0x60),v8(0x51),v8(0x7f),v8(0xa9),v8(0x19),v8(0xb5),v8(0x4a),v8(0x0d)
.byte v8(0x2d),v8(0xe5),v8(0x7a),v8(0x9f),v8(0x93),v8(0xc9),v8(0x9c),v8(0xef)
.byte v8(0xa0),v8(0xe0),v8(0x3b),v8(0x4d),v8(0xae),v8(0x2a),v8(0xf5),v8(0xb0)
.byte v8(0xc8),v8(0xeb),v8(0xbb),v8(0x3c),v8(0x83),v8(0x53),v8(0x99),v8(0x61)
.byte v8(0x17),v8(0x2b),v8(0x04),v8(0x7e),v8(0xba),v8(0x77),v8(0xd6),v8(0x26)
.byte v8(0xe1),v8(0x69),v8(0x14),v8(0x63),v8(0x55),v8(0x21),v8(0x0c),v8(0x7d)
#endif