#include "aesopt.h"
#include "aestab.h"
#if defined(__cplusplus)
extern "C"
{
#endif
#define ki(y,x,k,c) (s(y,c) = s(x, c) ^ (k)[c])
#define xo(y,x,c) (s(y,c) ^= s(x, c))
#define si(y,x,c) (s(y,c) = word_in(x, c))
#define so(y,x,c) word_out(y, c, s(x,c))
#if defined(ARRAYS)
#define locals(y,x) x[4],y[4]
#else
#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
#endif
#define dtables(tab) const aes_32t *tab##0, *tab##1, *tab##2, *tab##3
#define itables(tab) tab##0 = tab[0]; tab##1 = tab[1]; tab##2 = tab[2]; tab##3 = tab[3]
#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
s(y,2) = s(x,2); s(y,3) = s(x,3);
#define key_in(y,x,k) ki(y,x,k,0); ki(y,x,k,1); ki(y,x,k,2); ki(y,x,k,3)
#define cbc(y,x) xo(y,x,0); xo(y,x,1); xo(y,x,2); xo(y,x,3)
#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
#if defined(ENCRYPTION) && !defined(AES_ASM)
#if defined(_MSC_VER)
#pragma optimize( "s", on )
#endif
#define fwd_var(x,r,c)\
( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
: r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
: ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
#if defined(FT4_SET)
#undef dec_fmvars
# if defined(ENC_ROUND_CACHE_TABLES)
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fn,fwd_var,rf1,c))
# else
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_fn,fwd_var,rf1,c))
# endif
#elif defined(FT1_SET)
#undef dec_fmvars
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_fn,fwd_var,rf1,c))
#else
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_sbox,fwd_var,rf1,c)))
#endif
#if defined(FL4_SET)
# if defined(LAST_ENC_ROUND_CACHE_TABLES)
#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fl,fwd_var,rf1,c))
# else
#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_fl,fwd_var,rf1,c))
# endif
#elif defined(FL1_SET)
#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_fl,fwd_var,rf1,c))
#else
#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_sbox,fwd_var,rf1,c))
#endif
aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk,
unsigned char *out, const aes_encrypt_ctx cx[1])
{ aes_32t locals(b0, b1);
const aes_32t *kp;
const aes_32t *kptr = cx->ks;
#if defined(ENC_ROUND_CACHE_TABLES)
dtables(t_fn);
#endif
#if defined(LAST_ENC_ROUND_CACHE_TABLES)
dtables(t_fl);
#endif
#if defined( dec_fmvars )
dec_fmvars;
#endif
#if defined( AES_ERR_CHK )
if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 )
return aes_error;
#endif
state_in(b0, in_iv);
for (;num_blk; in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE, --num_blk)
{
kp = kptr;
#if 0
state_in(b1, in);
cbc(b1, b0);
key_in(b0, b1, kp);
#else
key_in(b1, b0, kp); state_in(b0, in); cbc(b0, b1); #endif
#if defined(ENC_ROUND_CACHE_TABLES)
itables(t_fn);
#endif
#if (ENC_UNROLL == FULL)
switch(cx->rn)
{
case 14:
round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
kp += 2 * N_COLS;
case 12:
round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
kp += 2 * N_COLS;
case 10:
default:
round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
round(fwd_rnd, b1, b0, kp + 3 * N_COLS);
round(fwd_rnd, b0, b1, kp + 4 * N_COLS);
round(fwd_rnd, b1, b0, kp + 5 * N_COLS);
round(fwd_rnd, b0, b1, kp + 6 * N_COLS);
round(fwd_rnd, b1, b0, kp + 7 * N_COLS);
round(fwd_rnd, b0, b1, kp + 8 * N_COLS);
round(fwd_rnd, b1, b0, kp + 9 * N_COLS);
#if defined(LAST_ENC_ROUND_CACHE_TABLES)
itables(t_fl);
#endif
round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
}
#else
{ aes_32t rnd;
#if (ENC_UNROLL == PARTIAL)
for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd)
{
kp += N_COLS;
round(fwd_rnd, b1, b0, kp);
kp += N_COLS;
round(fwd_rnd, b0, b1, kp);
}
kp += N_COLS;
round(fwd_rnd, b1, b0, kp);
#else
for(rnd = 0; rnd < cx->rn - 1; ++rnd)
{
kp += N_COLS;
round(fwd_rnd, b1, b0, kp);
l_copy(b0, b1);
}
#endif
#if defined(LAST_ENC_ROUND_CACHE_TABLES)
itables(t_fl);
#endif
kp += N_COLS;
round(fwd_lrnd, b0, b1, kp);
}
#endif
state_out(out, b0);
}
#if defined( AES_ERR_CHK )
return aes_good;
#endif
}
#endif
#if defined(DECRYPTION) && !defined(AES_ASM)
#if defined(_MSC_VER)
#pragma optimize( "t", on )
#endif
#define inv_var(x,r,c)\
( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
: r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
: ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
#if defined(IT4_SET)
#undef dec_imvars
# if defined(DEC_ROUND_CACHE_TABLES)
#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_in,inv_var,rf1,c))
# else
#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_in,inv_var,rf1,c))
# endif
#elif defined(IT1_SET)
#undef dec_imvars
#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_in,inv_var,rf1,c))
#else
#define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c)))
#endif
#if defined(IL4_SET)
# if defined(LAST_DEC_ROUND_CACHE_TABLES)
#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_il,inv_var,rf1,c))
# else
#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_il,inv_var,rf1,c))
# endif
#elif defined(IL1_SET)
#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_il,inv_var,rf1,c))
#else
#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c))
#endif
aes_rval aes_decrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk,
unsigned char *out, const aes_decrypt_ctx cx[1])
{ aes_32t locals(b0, b1);
const aes_32t *kptr = cx->ks + cx->rn * N_COLS;
const aes_32t *kp;
#if defined(DEC_ROUND_CACHE_TABLES)
dtables(t_in);
#endif
#if defined(LAST_DEC_ROUND_CACHE_TABLES)
dtables(t_il);
#endif
#if defined( dec_imvars )
dec_imvars;
#endif
#if defined( AES_ERR_CHK )
if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 )
return aes_error;
#endif
#if defined(DEC_ROUND_CACHE_TABLES)
itables(t_in);
#endif
in += AES_BLOCK_SIZE * (num_blk - 1);
out += AES_BLOCK_SIZE * (num_blk - 1);
state_in(b1, in);
for (;num_blk; out -= AES_BLOCK_SIZE, --num_blk)
{
kp = kptr;
key_in(b0, b1, kp);
#if (DEC_UNROLL == FULL)
switch(cx->rn)
{
case 14:
round(inv_rnd, b1, b0, kp - 1 * N_COLS);
round(inv_rnd, b0, b1, kp - 2 * N_COLS);
kp -= 2 * N_COLS;
case 12:
round(inv_rnd, b1, b0, kp - 1 * N_COLS);
round(inv_rnd, b0, b1, kp - 2 * N_COLS);
kp -= 2 * N_COLS;
case 10:
default:
round(inv_rnd, b1, b0, kp - 1 * N_COLS);
round(inv_rnd, b0, b1, kp - 2 * N_COLS);
round(inv_rnd, b1, b0, kp - 3 * N_COLS);
round(inv_rnd, b0, b1, kp - 4 * N_COLS);
round(inv_rnd, b1, b0, kp - 5 * N_COLS);
round(inv_rnd, b0, b1, kp - 6 * N_COLS);
round(inv_rnd, b1, b0, kp - 7 * N_COLS);
round(inv_rnd, b0, b1, kp - 8 * N_COLS);
round(inv_rnd, b1, b0, kp - 9 * N_COLS);
#if defined(LAST_DEC_ROUND_CACHE_TABLES)
itables(t_il);
#endif
round(inv_lrnd, b0, b1, kp - 10 * N_COLS);
}
#else
{ aes_32t rnd;
#if (DEC_UNROLL == PARTIAL)
for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd)
{
kp -= N_COLS;
round(inv_rnd, b1, b0, kp);
kp -= N_COLS;
round(inv_rnd, b0, b1, kp);
}
kp -= N_COLS;
round(inv_rnd, b1, b0, kp);
#else
for(rnd = 0; rnd < cx->rn - 1; ++rnd)
{
kp -= N_COLS;
round(inv_rnd, b1, b0, kp);
l_copy(b0, b1);
}
#endif
#if defined(LAST_DEC_ROUND_CACHE_TABLES)
itables(t_il);
#endif
kp -= N_COLS;
round(inv_lrnd, b0, b1, kp);
}
#endif
if (num_blk == 1)
{
state_in(b1, in_iv);
}
else
{
in -= AES_BLOCK_SIZE;
state_in(b1, in);
}
cbc(b0, b1);
state_out(out, b0);
}
#if defined( AES_ERR_CHK )
return aes_good;
#endif
}
#endif
#if defined(__cplusplus)
}
#endif