vRijndael-alg-ref.c [plain text]
#include "rijndaelApi.h"
#include "rijndael-alg-ref.h"
#include "boxes-ref.h"
#include <string.h>
#define VAES_DEBUG 1
#if VAES_DEBUG
#include <stdio.h>
#define vdprintf(s) printf s
#else
#define vdprintf(s)
#endif
#define SC ((BC - 4) >> 1)
#if defined(__ppc__) && defined(ALTIVEC_ENABLE)
typedef union {
unsigned char s[4][8];
unsigned long l[8];
vector unsigned char v[2];
} doubleVec;
typedef union {
unsigned long s[4];
vector unsigned long v;
} vecLong;
static word8 shifts[3][4][2] = {
{ { 0, 0 },
{ 1, 3 },
{ 2, 2 },
{ 3, 1 }
},
{ { 0, 0 },
{ 1, 5 },
{ 2, 4 },
{ 3, 3 }
},
{ { 0, 0 },
{ 1, 7 },
{ 3, 5 },
{ 4, 4 }
}
};
int vRijndaelKeySched ( vector unsigned char vk[2], int keyBits, int blockBits,
unsigned char W[MAXROUNDS+1][4][MAXBC])
{
int KC, BC, ROUNDS;
int i, j, t, rconpointer = 0;
doubleVec tk;
register vector unsigned char v1, v2, mask;
switch (keyBits) {
case 128: KC = 4; break;
case 192: KC = 6; break;
case 256: KC = 8; break;
default : return (-1);
}
switch (blockBits) {
case 128: BC = 4; break;
case 192: BC = 6; break;
case 256: BC = 8; break;
default : return (-2);
}
switch (keyBits >= blockBits ? keyBits : blockBits) {
case 128: ROUNDS = 10; break;
case 192: ROUNDS = 12; break;
case 256: ROUNDS = 14; break;
default : return (-3);
}
tk.v[0] = vk[0];
tk.v[1] = vk[1];
t = 0;
for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j];
while (t < (ROUNDS+1)*BC) {
for(i = 0; i < 4; i++)
tk.s[i][0] ^= *((word8 *)S + tk.s[(i+1)%4][KC-1]);
tk.s[0][0] ^= rcon[rconpointer++];
if (KC != 8) {
mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff );
for ( i = 0; i < 2; i++ ) {
v1 = vec_sld( tk.v[i], tk.v[i], 15 );
v2 = vec_and( v1, mask );
tk.v[i] = vec_xor( tk.v[i], v2 );
}
}
else {
mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0, 0, 0, 0 );
for ( i = 0; i < 2; i++ ) {
v1 = vec_sld( tk.v[i], tk.v[i], 15 );
v2 = vec_and( v1, mask );
tk.v[i] = vec_xor( tk.v[i], v2 );
for(j = 0; j < 4; j++) tk.s[i][KC/2] ^= *((word8 *)S + tk.s[i][KC/2 - 1]);
mask = vec_sld( mask, mask, 4 );
v2 = vec_and( v1, mask );
tk.v[i] = vec_xor( tk.v[i], v2 );
mask = vec_sld( mask, mask, 4 );
}
}
for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j];
}
return 0;
}
void vMakeKey(BYTE *keyMaterial, keyInstance *key)
{
register vector unsigned char v1, v2, v3, mask;
vector unsigned char vk[2];
v1 = vec_ld( 0, (vector unsigned char *) keyMaterial );
v2 = vec_ld( 16, (vector unsigned char *) keyMaterial );
if ( (long) keyMaterial & 0x0fL )
{ v3 = vec_ld( 32, (vector unsigned char *) keyMaterial );
mask = vec_lvsl( 0, keyMaterial );
v1 = vec_perm( v1, v2, mask );
v2 = vec_perm( v2, v3, mask );
}
vk[0] = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
vk[1] = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
vRijndaelKeySched (vk, key->keyLen, key->blockLen, key->keySched);
memset( (char *) vk, 0, 4 * MAXKC);
}
vector unsigned char rimskyKorsakov ( vector unsigned char v, vector unsigned char * table )
{
register vector unsigned char upperBits000, upperBits001, upperBits010, upperBits011,
upperBits100, upperBits101, upperBits110, upperBits111,
lookupBit00, lookupBit01, lookupBit10, lookupBit11,
lookupBit0, lookupBit1, lookup,
maskForBit6, maskForBit7, maskForBit8, seven;
register vector unsigned char *tabeven, *tabodd;
seven = vec_splat_u8 ( 7 );
tabeven = table++;
tabodd = table;
upperBits000 = vec_perm ( *tabeven, *tabodd, v );
tabeven += 2; tabodd += 2;
upperBits001 = vec_perm ( *tabeven, *tabodd, v );
tabeven += 2; tabodd += 2;
upperBits010 = vec_perm ( *tabeven, *tabodd, v );
tabeven += 2; tabodd += 2;
upperBits011 = vec_perm ( *tabeven, *tabodd, v );
tabeven += 2; tabodd += 2;
upperBits100 = vec_perm ( *tabeven, *tabodd, v );
tabeven += 2; tabodd += 2;
upperBits101 = vec_perm ( *tabeven, *tabodd, v );
tabeven += 2; tabodd += 2;
upperBits110 = vec_perm ( *tabeven, *tabodd, v );
tabeven += 2; tabodd += 2;
upperBits111 = vec_perm ( *tabeven, *tabodd, v );
maskForBit6 = vec_sl ( v, vec_splat_u8 ( 2 ) );
maskForBit6 = vec_sra ( maskForBit6, seven );
lookupBit00 = vec_sel ( upperBits000, upperBits001, maskForBit6 );
lookupBit01 = vec_sel ( upperBits010, upperBits011, maskForBit6 );
lookupBit10 = vec_sel ( upperBits100, upperBits101, maskForBit6 );
lookupBit11 = vec_sel ( upperBits110, upperBits111, maskForBit6 );
maskForBit7 = vec_sl ( v, vec_splat_u8 ( 1 ) );
maskForBit7 = vec_sra ( maskForBit7, seven );
lookupBit0 = vec_sel ( lookupBit00, lookupBit01, maskForBit7 );
lookupBit1 = vec_sel ( lookupBit10, lookupBit11, maskForBit7 );
maskForBit8 = vec_sra ( v, seven );
lookup = vec_sel ( lookupBit0, lookupBit1, maskForBit8 );
return lookup;
}
vector unsigned char vmul(vector unsigned char a, vector unsigned char b)
{
register vector unsigned char x, y, zero;
register vector unsigned short xh, yh, zhi, zlo, two54, two55;
zero = vec_splat_u8( 0 );
two55 = vec_splat_u16( -1 );
two55 = (vector unsigned short) vec_mergeh( zero, (vector unsigned char) two55 );
two54 = vec_sub( two55, vec_splat_u16( 1 ) );
x = rimskyKorsakov( a, (vector unsigned char *)Logtable ); y = rimskyKorsakov( b, (vector unsigned char *)Logtable );
xh = (vector unsigned short) vec_mergeh( zero, x );
yh = (vector unsigned short) vec_mergeh( zero, y );
xh = vec_add( xh, yh ); yh = vec_sub( xh, two55 );
zhi = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) );
xh = (vector unsigned short) vec_mergel( zero, x );
yh = (vector unsigned short) vec_mergel( zero, y );
xh = vec_add( xh, yh );
yh = vec_sub( xh, two55 );
zlo = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) );
x = vec_pack( zhi, zlo ); x = rimskyKorsakov( x, (vector unsigned char *)Alogtable ); x = vec_sel( x, zero, vec_cmpeq( a, zero ) ); x = vec_sel( x, zero, vec_cmpeq( b, zero ) ); return x;
}
void vKeyAddition(vector unsigned char v[2], vector unsigned char rk[2])
{
v[0] = vec_xor( v[0], rk[0] ); v[1] = vec_xor( v[1], rk[1] ); }
void vShiftRow(vector unsigned char v[2], word8 d, word8 BC)
{
vecLong sh;
register vector unsigned char mask, mask1, t;
register vector bool char c;
register int i, j;
sh.s[0] = 0;
for (i = 1; i < 4; i++)
sh.s[i] = shifts[SC][i][d] % BC;
j = 0;
for ( i = 0; i < 2; i++ ) {
mask = vec_lvsl( 0, (int *) sh.s[j++]); mask1 = vec_lvsl( 0, (int *) sh.s[j++]); if (BC == 4) {
mask = vec_sld( mask, mask1, 8 ); mask = vec_and( mask, vec_splat_u8( 3 ) );
} else if (BC == 6) {
mask = vec_sld( mask, mask, 8 );
mask = vec_sld( mask, mask1, 8 ); t = vec_sub( mask, vec_splat_u8( 6 ) );
c = vec_cmpgt( mask, vec_splat_u8( 5 ) );
mask = vec_sel( mask, t, c );
} else {
mask = vec_sld( mask, mask1, 8 ); mask = vec_and( mask, vec_splat_u8( 7 ) );
}
mask1 = vec_sld( vec_splat_u8( 0 ), vec_splat_u8( 8 ), 8 );
mask = vec_add( mask, mask1 );
v[i] = vec_perm( v[i], v[i], mask ); }
}
void vSubstitution( vector unsigned char v[2], vector unsigned char box[16] )
{
v[0] = rimskyKorsakov( v[0], box ); v[1] = rimskyKorsakov( v[1], box ); }
void vMixColumn(vector unsigned char v[2])
{
register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3;
register vector unsigned char two, three;
two = vec_splat_u8( 2 );
three = vec_splat_u8( 3 );
a1 = vec_sld( v[0], v[1], 8 ); b1 = vec_sld( v[1], v[0], 8 );
a2 = vec_sld( a1, b1, 8 ); b2 = vec_sld( b1, a1, 8 );
a3 = vec_sld( a2, b2, 8 ); b3 = vec_sld( b2, a2, 8 );
a0 = vmul( two, v[0] ); a0 = vec_xor( a0, vmul( three, a1 ) ); a0 = vec_xor( a0, a2 ); v[0] = vec_xor( a0, a3 );
b0 = vmul( two, v[1] );
b0 = vec_xor( b0, vmul( three, b1 ) );
b0 = vec_xor( b0, b2 );
v[1] = vec_xor( b0, b3 );
}
void vInvMixColumn(vector unsigned char v[2])
{
register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3;
register vector unsigned char nine, eleven, thirteen, fourteen;;
nine = vec_splat_u8( 0x9 );
eleven = vec_splat_u8( 0xb );
thirteen = vec_splat_u8( 0xd );
fourteen = vec_splat_u8( 0xe );
a1 = vec_sld( v[0], v[1], 8 ); b1 = vec_sld( v[1], v[0], 8 );
a2 = vec_sld( a1, b1, 8 ); b2 = vec_sld( b1, a1, 8 );
a3 = vec_sld( a2, b2, 8 ); b3 = vec_sld( b2, a2, 8 );
a0 = vmul( fourteen, v[0] ); a0 = vec_xor( a0, vmul( eleven, a1 ) ); a0 = vec_xor( a0, vmul( thirteen, a2 ) ); v[0] = vec_xor( a0, vmul( nine, a3 ) );
b0 = vmul( fourteen, v[1] );
b0 = vec_xor( b0, vmul( eleven, b1 ) );
b0 = vec_xor( b0, vmul( thirteen, b2 ) );
v[1] = vec_xor( b0, vmul( nine, b3 ) );
}
int vRijndaelEncrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2])
{
int r, BC, ROUNDS;
switch (blockBits) {
case 128: BC = 4; break;
case 192: BC = 6; break;
case 256: BC = 8; break;
default : return (-2);
}
switch (keyBits >= blockBits ? keyBits : blockBits) {
case 128: ROUNDS = 10; break;
case 192: ROUNDS = 12; break;
case 256: ROUNDS = 14; break;
default : return (-3);
}
vKeyAddition( a, rk[0] );
for(r = 1; r < ROUNDS; r++) {
vSubstitution( a, (vector unsigned char *)S);
vShiftRow( a, 0, BC);
vMixColumn( a );
vKeyAddition( a, rk[r] );
}
vSubstitution( a, (vector unsigned char *)S);
vShiftRow( a, 0, BC);
vKeyAddition( a, rk[ROUNDS] );
return 0;
}
int vRijndaelDecrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2])
{
int r, BC, ROUNDS;
switch (blockBits) {
case 128: BC = 4; break;
case 192: BC = 6; break;
case 256: BC = 8; break;
default : return (-2);
}
switch (keyBits >= blockBits ? keyBits : blockBits) {
case 128: ROUNDS = 10; break;
case 192: ROUNDS = 12; break;
case 256: ROUNDS = 14; break;
default : return (-3);
}
vKeyAddition( a, rk[ROUNDS] );
vSubstitution( a, (vector unsigned char *)Si);
vShiftRow( a, 1, BC);
for(r = ROUNDS-1; r > 0; r--) {
vKeyAddition( a, rk[r] );
vInvMixColumn( a );
vSubstitution( a, (vector unsigned char *)Si);
vShiftRow( a, 1, BC);
}
vKeyAddition( a, rk[0] );
return 0;
}
#if 0
void vBlockEncrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer)
{
register vector unsigned char v1, v2, v3, v4, mask;
register vector bool char cmp;
v1 = vec_ld( 0, (vector unsigned char *) input );
v2 = vec_ld( 16, (vector unsigned char *) input );
if ( (long) input & 0x0fL )
{ v3 = vec_ld( 32, (vector unsigned char *) input );
mask = vec_lvsl( 0, input );
v1 = vec_perm( v1, v2, mask );
v2 = vec_perm( v2, v3, mask );
}
v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
v4 = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
if (cipher->mode == MODE_CBC) {
v3 = vec_xor( v3, *((vector unsigned char *) cipher->chainBlock ) );
v4 = vec_xor( v4, *((vector unsigned char *) cipher->chainBlock + 1 ) );
}
vec_st( v3, 0, (vector unsigned char *) cipher->chainBlock );
vec_st( v4, 16, (vector unsigned char *) cipher->chainBlock );
vRijndaelEncrypt((vector unsigned char *) cipher->chainBlock, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched);
v1 = vec_ld( 0, (vector unsigned char *) cipher->chainBlock );
v2 = vec_ld( 16, (vector unsigned char *) cipher->chainBlock );
v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) );
v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) );
if ( (long) outBuffer & 0x0fL )
{
mask = vec_lvsr( 0, outBuffer );
cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) );
v1 = vec_perm( v3, v3, mask );
v2 = vec_perm( v4, v4, mask );
v3 = vec_ld( 0, (vector unsigned char *) outBuffer );
v4 = vec_sel( v3, v1, cmp );
vec_st( v4, 0, (vector unsigned char *) outBuffer );
v1 = vec_sel( v1, v2, cmp );
vec_st( v1, 16, (vector unsigned char *) outBuffer );
v3 = vec_ld( 32, (vector unsigned char *) outBuffer );
v2 = vec_sel( v2, v3, cmp );
vec_st( v2, 32, (vector unsigned char *) outBuffer );
} else {
vec_st( v3, 0, (vector unsigned char *) outBuffer );
vec_st( v4, 16, (vector unsigned char *) outBuffer );
}
return;
}
void vBlockDecrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer)
{
register vector unsigned char v1, v2, v3, v4, mask;
register vector bool char cmp;
vector unsigned char block[2], cblock[2];
v1 = vec_ld( 0, (vector unsigned char *) input );
v2 = vec_ld( 16, (vector unsigned char *) input );
if ( (long) input & 0x0fL )
{ v3 = vec_ld( 32, (vector unsigned char *) input );
mask = vec_lvsl( 0, input );
v1 = vec_perm( v1, v2, mask );
v2 = vec_perm( v2, v3, mask );
}
v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
v4 = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
block[0] = v3;
block[1] = v4;
if (cipher->mode == MODE_CBC) {
cblock[0] = v3;
cblock[1] = v4;
}
vRijndaelDecrypt ((vector unsigned char *) block, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched);
v1 = block[0];
v2 = block[1];
if (cipher->mode == MODE_CBC) {
v1 = vec_xor( v1, *((vector unsigned char *) cipher->chainBlock) );
v2 = vec_xor( v2, *((vector unsigned char *) cipher->chainBlock + 1) );
vec_st( cblock[0], 0, (vector unsigned char *) cipher->chainBlock );
vec_st( cblock[1], 16, (vector unsigned char *) cipher->chainBlock );
}
v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) );
v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) );
if ( (long) outBuffer & 0x0fL )
{
mask = vec_lvsr( 0, outBuffer );
cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) );
v1 = vec_perm( v3, v3, mask );
v2 = vec_perm( v4, v4, mask );
v3 = vec_ld( 0, (vector unsigned char *) outBuffer );
v4 = vec_sel( v3, v1, cmp );
vec_st( v4, 0, (vector unsigned char *) outBuffer );
v1 = vec_sel( v1, v2, cmp );
vec_st( v1, 16, (vector unsigned char *) outBuffer );
v3 = vec_ld( 32, (vector unsigned char *) outBuffer );
v2 = vec_sel( v2, v3, cmp );
vec_st( v2, 32, (vector unsigned char *) outBuffer );
} else {
vec_st( v3, 0, (vector unsigned char *) outBuffer );
vec_st( v4, 16, (vector unsigned char *) outBuffer );
}
}
#endif
void vBlockEncrypt128(
keyInstance *key,
BYTE *input,
BYTE *outBuffer)
{
vector unsigned char block[2];
register vector unsigned char v1, v2;
if ( (long) input & 0x0fL ) {
BYTE localBuf[16];
vdprintf(("vBlockEncrypt128: unaligned input\n"));
if((unsigned)localBuf & 0xf) {
vdprintf(("vBlockEncrypt128: unaligned localBuf!\n"));
}
memmove(localBuf, input, 16);
v1 = vec_ld(0, (vector unsigned char *)localBuf);
}
else {
vdprintf(("vBlockEncrypt128: aligned input\n"));
v1 = vec_ld( 0, (vector unsigned char *) input );
}
block[0] = vec_perm(v1, v2,
(vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1,
5, 9, 13, 17, 21, 25, 29 ) );
block[1] = vec_perm( v1, v2,
(vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3,
7, 11, 15, 19, 23, 27, 31 ) );
vRijndaelEncrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched);
v1 = vec_perm(block[0], block[1],
(vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2,
10, 18, 26, 3, 11, 19, 27 ) );
v2 = vec_perm(block[0], block[1],
(vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6,
14, 22, 30, 7, 15, 23, 31 ) );
if ( (long) outBuffer & 0x0fL )
{
BYTE localBuf[16];
vec_st(v1, 0, (vector unsigned char *) localBuf );
memmove(outBuffer, localBuf, 16);
} else {
vec_st( v1, 0, (vector unsigned char *) outBuffer );
}
return;
}
void vBlockDecrypt128(
keyInstance *key,
BYTE *input,
BYTE *outBuffer)
{
vector unsigned char block[2];
register vector unsigned char v1, v2;
if ( (long) input & 0x0fL ) {
BYTE localBuf[16];
vdprintf(("vBlockDecrypt128: unaligned input\n"));
if((unsigned)localBuf & 0xf) {
vdprintf(("vBlockDecrypt128: unaligned localBuf!\n"));
}
memmove(localBuf, input, 16);
v1 = vec_ld(0, (vector unsigned char *)localBuf);
}
else {
vdprintf(("vBlockDecrypt128: aligned input\n"));
v1 = vec_ld( 0, (vector unsigned char *) input );
}
block[0] = vec_perm(v1, v2,
(vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1,
5, 9, 13, 17, 21, 25, 29 ) );
block[1] = vec_perm( v1, v2,
(vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3,
7, 11, 15, 19, 23, 27, 31 ) );
vRijndaelDecrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched);
v1 = vec_perm(block[0], block[1],
(vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2,
10, 18, 26, 3, 11, 19, 27 ) );
v2 = vec_perm(block[0], block[1],
(vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6,
14, 22, 30, 7, 15, 23, 31 ) );
if ( (long) outBuffer & 0x0fL ) {
BYTE localBuf[16];
vec_st(v1, 0, (vector unsigned char *) localBuf );
memmove(outBuffer, localBuf, 16);
} else {
vec_st( v1, 0, (vector unsigned char *) outBuffer );
}
return;
}
#endif