#include "aesopt.h"
#define unused 77
#define si(y,x,k,c) s(y,c) = word_in(x + 4 * c) ^ k[c]
#define so(y,x,c) word_out(y + 4 * c, s(x,c))
#if BLOCK_SIZE == 16
#if defined(ARRAYS)
#define locals(y,x) x[4],y[4]
#else
#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
#define b04 unused
#define b05 unused
#define b06 unused
#define b07 unused
#define b14 unused
#define b15 unused
#define b16 unused
#define b17 unused
#endif
#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
s(y,2) = s(x,2); s(y,3) = s(x,3);
#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
#elif BLOCK_SIZE == 20
#if defined(ARRAYS)
#define locals(y,x) x[5],y[5]
#else
#define locals(y,x) x##0,x##1,x##2,x##3,x##4,y##0,y##1,y##2,y##3,y##4
#define b05 unused
#define b06 unused
#define b07 unused
#define b15 unused
#define b16 unused
#define b17 unused
#endif
#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
s(y,2) = s(x,2); s(y,3) = s(x,3); s(y,4) = s(x,4);
#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); si(y,x,k,4)
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); so(y,x,4)
#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); rm(y,x,k,4)
#elif BLOCK_SIZE == 24
#if defined(ARRAYS)
#define locals(y,x) x[6],y[6]
#else
#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \
y##0,y##1,y##2,y##3,y##4,y##5
#define b06 unused
#define b07 unused
#define b16 unused
#define b17 unused
#endif
#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
s(y,2) = s(x,2); s(y,3) = s(x,3); \
s(y,4) = s(x,4); s(y,5) = s(x,5);
#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
si(y,x,k,3); si(y,x,k,4); si(y,x,k,5)
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \
so(y,x,3); so(y,x,4); so(y,x,5)
#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5)
#elif BLOCK_SIZE == 28
#if defined(ARRAYS)
#define locals(y,x) x[7],y[7]
#else
#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6 \
y##0,y##1,y##2,y##3,y##4,y##5,y##6
#define b07 unused
#define b17 unused
#endif
#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
s(y,2) = s(x,2); s(y,3) = s(x,3); \
s(y,4) = s(x,4); s(y,5) = s(x,5);; s(y,6) = s(x,6);
#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
si(y,x,k,3); si(y,x,k,4); si(y,x,k,5); si(y,x,k,6)
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \
so(y,x,3); so(y,x,4); so(y,x,5); so(y,x,6)
#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6)
#else
#if defined(ARRAYS)
#define locals(y,x) x[8],y[8]
#else
#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \
y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7
#endif
#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
s(y,2) = s(x,2); s(y,3) = s(x,3); \
s(y,4) = s(x,4); s(y,5) = s(x,5); \
s(y,6) = s(x,6); s(y,7) = s(x,7);
#if BLOCK_SIZE == 32
#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \
si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7)
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \
so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7)
#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \
rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7)
#else
#define state_in(y,x,k) \
switch(nc) \
{ case 8: si(y,x,k,7); \
case 7: si(y,x,k,6); \
case 6: si(y,x,k,5); \
case 5: si(y,x,k,4); \
case 4: si(y,x,k,3); si(y,x,k,2); \
si(y,x,k,1); si(y,x,k,0); \
}
#define state_out(y,x) \
switch(nc) \
{ case 8: so(y,x,7); \
case 7: so(y,x,6); \
case 6: so(y,x,5); \
case 5: so(y,x,4); \
case 4: so(y,x,3); so(y,x,2); \
so(y,x,1); so(y,x,0); \
}
#if defined(FAST_VARIABLE)
#define round(rm,y,x,k) \
switch(nc) \
{ case 8: rm(y,x,k,7); rm(y,x,k,6); \
rm(y,x,k,5); rm(y,x,k,4); \
rm(y,x,k,3); rm(y,x,k,2); \
rm(y,x,k,1); rm(y,x,k,0); \
break; \
case 7: rm(y,x,k,6); rm(y,x,k,5); \
rm(y,x,k,4); rm(y,x,k,3); \
rm(y,x,k,2); rm(y,x,k,1); \
rm(y,x,k,0); \
break; \
case 6: rm(y,x,k,5); rm(y,x,k,4); \
rm(y,x,k,3); rm(y,x,k,2); \
rm(y,x,k,1); rm(y,x,k,0); \
break; \
case 5: rm(y,x,k,4); rm(y,x,k,3); \
rm(y,x,k,2); rm(y,x,k,1); \
rm(y,x,k,0); \
break; \
case 4: rm(y,x,k,3); rm(y,x,k,2); \
rm(y,x,k,1); rm(y,x,k,0); \
break; \
}
#else
#define round(rm,y,x,k) \
switch(nc) \
{ case 8: rm(y,x,k,7); \
case 7: rm(y,x,k,6); \
case 6: rm(y,x,k,5); \
case 5: rm(y,x,k,4); \
case 4: rm(y,x,k,3); rm(y,x,k,2); \
rm(y,x,k,1); rm(y,x,k,0); \
}
#endif
#endif
#endif
#if defined(ENCRYPTION)
#define fwd_var(x,r,c) \
( r==0 ? \
( c==0 ? s(x,0) \
: c==1 ? s(x,1) \
: c==2 ? s(x,2) \
: c==3 ? s(x,3) \
: c==4 ? s(x,4) \
: c==5 ? s(x,5) \
: c==6 ? s(x,6) \
: s(x,7)) \
: r==1 ? \
( c==0 ? s(x,1) \
: c==1 ? s(x,2) \
: c==2 ? s(x,3) \
: c==3 ? nc==4 ? s(x,0) : s(x,4) \
: c==4 ? nc==5 ? s(x,0) : s(x,5) \
: c==5 ? nc==6 ? s(x,0) : s(x,6) \
: c==6 ? nc==7 ? s(x,0) : s(x,7) \
: s(x,0)) \
: r==2 ? \
( c==0 ? nc==8 ? s(x,3) : s(x,2) \
: c==1 ? nc==8 ? s(x,4) : s(x,3) \
: c==2 ? nc==8 ? s(x,5) : nc==4 ? s(x,0) : s(x,4) \
: c==3 ? nc==8 ? s(x,6) : nc==5 ? s(x,0) : nc==4 ? s(x,1) : s(x,5) \
: c==4 ? nc==8 ? s(x,7) : nc==7 ? s(x,6) : nc==6 ? s(x,0) : s(x,1) \
: c==5 ? nc==6 ? s(x,1) : s(x,0) \
: c==6 ? s(x,1) \
: s(x,2)) \
: \
( c==0 ? nc>6 ? s(x,4) : s(x,3) \
: c==1 ? nc>6 ? s(x,5) : nc==4 ? s(x,0) : s(x,4) \
: c==2 ? nc>6 ? s(x,6) : nc==6 ? s(x,5) : nc==5 ? s(x,0) : s(x,1) \
: c==3 ? nc==8 ? s(x,7) : nc==5 ? s(x,1) : nc==4 ? s(x,2) : s(x,0) \
: c==4 ? nc==8 ? s(x,0) : nc==5 ? s(x,2) : s(x,1) \
: c==5 ? nc==8 ? s(x,1) : s(x,2) \
: c==6 ? nc==8 ? s(x,2) : s(x,3) \
: s(x,3)))
#if defined(FT4_SET)
#undef dec_fmvars
#define dec_fmvars
#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c)
#elif defined(FT1_SET)
#undef dec_fmvars
#define dec_fmvars
#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c)
#else
#define fwd_rnd(y,x,k,c) s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c]
#endif
#if defined(FL4_SET)
#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c)
#elif defined(FL1_SET)
#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c)
#else
#define fwd_lrnd(y,x,k,c) s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c]
#endif
aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])
{ uint32_t locals(b0, b1);
const uint32_t *kp = cx->k_sch;
dec_fmvars
if(!(cx->n_blk & 1)) return aes_bad;
#if (ENC_UNROLL == FULL)
state_in((cx->n_rnd & 1 ? b1 : b0), in_blk, kp);
kp += (cx->n_rnd - 9) * nc;
switch(cx->n_rnd)
{
case 14: round(fwd_rnd, b1, b0, kp - 4 * nc);
case 13: round(fwd_rnd, b0, b1, kp - 3 * nc);
case 12: round(fwd_rnd, b1, b0, kp - 2 * nc);
case 11: round(fwd_rnd, b0, b1, kp - nc);
case 10: round(fwd_rnd, b1, b0, kp );
round(fwd_rnd, b0, b1, kp + nc);
round(fwd_rnd, b1, b0, kp + 2 * nc);
round(fwd_rnd, b0, b1, kp + 3 * nc);
round(fwd_rnd, b1, b0, kp + 4 * nc);
round(fwd_rnd, b0, b1, kp + 5 * nc);
round(fwd_rnd, b1, b0, kp + 6 * nc);
round(fwd_rnd, b0, b1, kp + 7 * nc);
round(fwd_rnd, b1, b0, kp + 8 * nc);
round(fwd_lrnd, b0, b1, kp + 9 * nc);
}
#else
{ uint32_t rnd;
state_in(b0, in_blk, kp);
#if (ENC_UNROLL == PARTIAL)
for(rnd = 0; rnd < (cx->n_rnd - 1) >> 1; ++rnd)
{
kp += nc;
round(fwd_rnd, b1, b0, kp);
kp += nc;
round(fwd_rnd, b0, b1, kp);
}
if(cx->n_rnd & 1)
{
l_copy(b1, b0);
}
else
{
kp += nc;
round(fwd_rnd, b1, b0, kp);
}
#else
for(rnd = 0; rnd < cx->n_rnd - 1; ++rnd)
{
kp += nc;
round(fwd_rnd, b1, b0, kp);
l_copy(b0, b1);
}
#endif
kp += nc;
round(fwd_lrnd, b0, b1, kp);
}
#endif
state_out(out_blk, b0);
return aes_good;
}
#endif
#if defined(DECRYPTION)
#define inv_var(x,r,c) \
( r==0 ? \
( c==0 ? s(x,0) \
: c==1 ? s(x,1) \
: c==2 ? s(x,2) \
: c==3 ? s(x,3) \
: c==4 ? s(x,4) \
: c==5 ? s(x,5) \
: c==6 ? s(x,6) \
: s(x,7)) \
: r==1 ? \
( c==0 ? nc==8 ? s(x,7) : nc==7 ? s(x,6) : nc==6 ? s(x,5) : nc==5 ? s(x,4) : s(x,3) \
: c==1 ? s(x,0) \
: c==2 ? s(x,1) \
: c==3 ? s(x,2) \
: c==4 ? s(x,3) \
: c==5 ? s(x,4) \
: c==6 ? s(x,5) \
: s(x,6)) \
: r==2 ? \
( c==0 ? nc>6 ? s(x,5) : nc==6 ? s(x,4) : nc==5 ? s(x,3) : s(x,2) \
: c==1 ? nc>6 ? s(x,6) : nc==6 ? s(x,5) : nc==5 ? s(x,4) : s(x,3) \
: c==2 ? nc==8 ? s(x,7) : s(x,0) \
: c==3 ? nc==8 ? s(x,0) : s(x,1) \
: c==4 ? nc==8 ? s(x,1) : s(x,2) \
: c==5 ? nc==8 ? s(x,2) : s(x,3) \
: c==6 ? nc==8 ? s(x,3) : s(x,4) \
: s(x,4)) \
: \
( c==0 ? nc==8 ? s(x,4) : nc==5 ? s(x,2) : nc==4 ? s(x,1) : s(x,3) \
: c==1 ? nc==8 ? s(x,5) : nc==5 ? s(x,3) : nc==4 ? s(x,2) : s(x,4) \
: c==2 ? nc==8 ? s(x,6) : nc==5 ? s(x,4) : nc==4 ? s(x,3) : s(x,5) \
: c==3 ? nc==8 ? s(x,7) : nc==7 ? s(x,6) : s(x,0) \
: c==4 ? nc>6 ? s(x,0) : s(x,1) \
: c==5 ? nc==6 ? s(x,2) : s(x,1) \
: c==6 ? s(x,2) \
: s(x,3)))
#if defined(IT4_SET)
#undef dec_imvars
#define dec_imvars
#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c)
#elif defined(IT1_SET)
#undef dec_imvars
#define dec_imvars
#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c)
#else
#define inv_rnd(y,x,k,c) s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c])
#endif
#if defined(IL4_SET)
#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c)
#elif defined(IL1_SET)
#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c)
#else
#define inv_lrnd(y,x,k,c) s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]
#endif
aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])
{ uint32_t locals(b0, b1);
const uint32_t *kp = cx->k_sch + nc * cx->n_rnd;
dec_imvars
if(!(cx->n_blk & 2)) return aes_bad;
#if (DEC_UNROLL == FULL)
state_in((cx->n_rnd & 1 ? b1 : b0), in_blk, kp);
kp = cx->k_sch + 9 * nc;
switch(cx->n_rnd)
{
case 14: round(inv_rnd, b1, b0, kp + 4 * nc);
case 13: round(inv_rnd, b0, b1, kp + 3 * nc);
case 12: round(inv_rnd, b1, b0, kp + 2 * nc);
case 11: round(inv_rnd, b0, b1, kp + nc);
case 10: round(inv_rnd, b1, b0, kp );
round(inv_rnd, b0, b1, kp - nc);
round(inv_rnd, b1, b0, kp - 2 * nc);
round(inv_rnd, b0, b1, kp - 3 * nc);
round(inv_rnd, b1, b0, kp - 4 * nc);
round(inv_rnd, b0, b1, kp - 5 * nc);
round(inv_rnd, b1, b0, kp - 6 * nc);
round(inv_rnd, b0, b1, kp - 7 * nc);
round(inv_rnd, b1, b0, kp - 8 * nc);
round(inv_lrnd, b0, b1, kp - 9 * nc);
}
#else
{ uint32_t rnd;
state_in(b0, in_blk, kp);
#if (DEC_UNROLL == PARTIAL)
for(rnd = 0; rnd < (cx->n_rnd - 1) >> 1; ++rnd)
{
kp -= nc;
round(inv_rnd, b1, b0, kp);
kp -= nc;
round(inv_rnd, b0, b1, kp);
}
if(cx->n_rnd & 1)
{
l_copy(b1, b0);
}
else
{
kp -= nc;
round(inv_rnd, b1, b0, kp);
}
#else
for(rnd = 0; rnd < cx->n_rnd - 1; ++rnd)
{
kp -= nc;
round(inv_rnd, b1, b0, kp);
l_copy(b0, b1);
}
#endif
kp -= nc;
round(inv_lrnd, b0, b1, kp);
}
#endif
state_out(out_blk, b0);
return aes_good;
}
#endif