src/lib/crypto/aes.c

   1 /*
   2  * Copyright (c) 2007, Cameron Rich
   3  *
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions are met:
   8  *
   9  * * Redistributions of source code must retain the above copyright notice,
  10  *   this list of conditions and the following disclaimer.
  11  * * Redistributions in binary form must reproduce the above copyright notice,
  12  *   this list of conditions and the following disclaimer in the documentation
  13  *   and/or other materials provided with the distribution.
  14  * * Neither the name of the axTLS project nor the names of its contributors
  15  *   may be used to endorse or promote products derived from this software
  16  *   without specific prior written permission.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  22  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  25  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  26  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  27  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29  */
  30
  31 /**
  32  * AES implementation - this is a small code version. There are much faster
  33  * versions around but they are much larger in size (i.e. they use large
  34  * submix tables).
  35  */
  36
  37 #include <string.h>
  38 #include "os_port.h"
  39 #include "crypto.h"
  40
  41 /* all commented out in skeleton mode */
  42 #ifndef CONFIG_SSL_SKELETON_MODE
  43
  44 #define rot1(x) (((x) << 24) | ((x) >> 8))
  45 #define rot2(x) (((x) << 16) | ((x) >> 16))
  46 #define rot3(x) (((x) <<  8) | ((x) >> 24))
  47
  48 /*
  49  * This cute trick does 4 'mul by two' at once.  Stolen from
  50  * Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
  51  * a standard graphics trick
  52  * The key to this is that we need to xor with 0x1b if the top bit is set.
  53  * a 1xxx xxxx   0xxx 0xxx First we mask the 7bit,
  54  * b 1000 0000   0000 0000 then we shift right by 7 putting the 7bit in 0bit,
  55  * c 0000 0001   0000 0000 we then subtract (c) from (b)
  56  * d 0111 1111   0000 0000 and now we and with our mask
  57  * e 0001 1011   0000 0000
  58  */
  59 #define mt  0x80808080
  60 #define ml  0x7f7f7f7f
  61 #define mh  0xfefefefe
  62 #define mm  0x1b1b1b1b
  63 #define mul2(x,t)       ((t)=((x)&mt), \
  64                         ((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
  65
  66 #define inv_mix_col(x,f2,f4,f8,f9) (\
  67                         (f2)=mul2(x,f2), \
  68                         (f4)=mul2(f2,f4), \
  69                         (f8)=mul2(f4,f8), \
  70                         (f9)=(x)^(f8), \
  71                         (f8)=((f2)^(f4)^(f8)), \
  72                         (f2)^=(f9), \
  73                         (f4)^=(f9), \
  74                         (f8)^=rot3(f2), \
  75                         (f8)^=rot2(f4), \
  76                         (f8)^rot1(f9))
  77
  78 /*
  79  * AES S-box
  80  */
  81 static const uint8_t aes_sbox[256] =
  82 {
  83         0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
  84         0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
  85         0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
  86         0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
  87         0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
  88         0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
  89         0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
  90         0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
  91         0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
  92         0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
  93         0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
  94         0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
  95         0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
  96         0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
  97         0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
  98         0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
  99         0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
 100         0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
 101         0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
 102         0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
 103         0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
 104         0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
 105         0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
 106         0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
 107         0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
 108         0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
 109         0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
 110         0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
 111         0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
 112         0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
 113         0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
 114         0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
 115 };
 116
 117 /*
 118  * AES is-box
 119  */
 120 static const uint8_t aes_isbox[256] =
 121 {
 122     0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
 123     0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
 124     0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
 125     0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
 126     0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
 127     0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
 128     0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
 129     0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
 130     0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
 131     0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
 132     0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
 133     0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
 134     0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
 135     0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
 136     0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
 137     0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
 138     0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
 139     0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
 140     0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
 141     0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
 142     0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
 143     0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
 144     0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
 145     0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
 146     0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
 147     0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
 148     0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
 149     0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
 150     0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
 151     0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
 152     0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
 153     0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
 154 };
 155
 156 static const unsigned char Rcon[30]=
 157 {
 158         0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
 159         0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
 160         0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
 161         0xb3,0x7d,0xfa,0xef,0xc5,0x91,
 162 };
 163
 164 /* ----- static functions ----- */
 165 static void AES_encrypt(const AES_CTX *ctx, uint32_t *data);
 166 static void AES_decrypt(const AES_CTX *ctx, uint32_t *data);
 167
 168 /* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
 169    x^8+x^4+x^3+x+1 */
 170 static unsigned char AES_xtime(uint32_t x)
 171 {
 172         return (x&0x80) ? (x<<1)^0x1b : x<<1;
 173 }
 174
 175 /**
 176  * Set up AES with the key/iv and cipher size.
 177  */
 178 void AES_set_key(AES_CTX *ctx, const uint8_t *key,
 179         const uint8_t *iv, AES_MODE mode)
 180 {
 181     int i, ii;
 182     uint32_t *W, tmp, tmp2;
 183     const unsigned char *ip;
 184     int words;
 185
 186     switch (mode)
 187     {
 188         case AES_MODE_128:
 189             i = 10;
 190             words = 4;
 191             break;
 192
 193         case AES_MODE_256:
 194             i = 14;
 195             words = 8;
 196             break;
 197
 198         default:        /* fail silently */
 199             return;
 200     }
 201
 202     ctx->rounds = i;
 203     ctx->key_size = words;
 204     W = ctx->ks;
 205     for (i = 0; i < words; i+=2)
 206     {
 207         W[i+0]= ((uint32_t)key[ 0]<<24)|
 208             ((uint32_t)key[ 1]<<16)|
 209             ((uint32_t)key[ 2]<< 8)|
 210             ((uint32_t)key[ 3]    );
 211         W[i+1]= ((uint32_t)key[ 4]<<24)|
 212             ((uint32_t)key[ 5]<<16)|
 213             ((uint32_t)key[ 6]<< 8)|
 214             ((uint32_t)key[ 7]    );
 215         key += 8;
 216     }
 217
 218     ip = Rcon;
 219     ii = 4 * (ctx->rounds+1);
 220     for (i = words; i<ii; i++)
 221     {
 222         tmp = W[i-1];
 223
 224         if ((i % words) == 0)
 225         {
 226             tmp2 =(uint32_t)aes_sbox[(tmp    )&0xff]<< 8;
 227             tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
 228             tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
 229             tmp2|=(uint32_t)aes_sbox[(tmp>>24)     ];
 230             tmp=tmp2^(((unsigned int)*ip)<<24);
 231             ip++;
 232         }
 233
 234         if ((words == 8) && ((i % words) == 4))
 235         {
 236             tmp2 =(uint32_t)aes_sbox[(tmp    )&0xff]    ;
 237             tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
 238             tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
 239             tmp2|=(uint32_t)aes_sbox[(tmp>>24)     ]<<24;
 240             tmp=tmp2;
 241         }
 242
 243         W[i]=W[i-words]^tmp;
 244     }
 245
 246     /* copy the iv across */
 247     memcpy(ctx->iv, iv, 16);
 248 }
 249
 250 /**
 251  * Change a key for decryption.
 252  */
 253 void AES_convert_key(AES_CTX *ctx)
 254 {
 255     int i;
 256     uint32_t *k,w,t1,t2,t3,t4;
 257
 258     k = ctx->ks;
 259     k += 4;
 260
 261     for (i= ctx->rounds*4; i > 4; i--)
 262     {
 263         w= *k;
 264         w = inv_mix_col(w,t1,t2,t3,t4);
 265         *k++ =w;
 266     }
 267 }
 268
 269 /**
 270  * Encrypt a byte sequence (with a block size 16) using the AES cipher.
 271  */
 272 void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
 273 {
 274     int i;
 275     uint32_t tin[4], tout[4], iv[4];
 276
 277     memcpy(iv, ctx->iv, AES_IV_SIZE);
 278     for (i = 0; i < 4; i++)
 279         tout[i] = ntohl(iv[i]);
 280
 281     for (length -= AES_BLOCKSIZE; length >= 0; length -= AES_BLOCKSIZE)
 282     {
 283         uint32_t msg_32[4];
 284         uint32_t out_32[4];
 285         memcpy(msg_32, msg, AES_BLOCKSIZE);
 286         msg += AES_BLOCKSIZE;
 287
 288         for (i = 0; i < 4; i++)
 289             tin[i] = ntohl(msg_32[i])^tout[i];
 290
 291         AES_encrypt(ctx, tin);
 292
 293         for (i = 0; i < 4; i++)
 294         {
 295             tout[i] = tin[i];
 296             out_32[i] = htonl(tout[i]);
 297         }
 298
 299         memcpy(out, out_32, AES_BLOCKSIZE);
 300         out += AES_BLOCKSIZE;
 301     }
 302
 303     for (i = 0; i < 4; i++)
 304         iv[i] = htonl(tout[i]);
 305     memcpy(ctx->iv, iv, AES_IV_SIZE);
 306 }
 307
 308 /**
 309  * Decrypt a byte sequence (with a block size 16) using the AES cipher.
 310  */
 311 void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
 312 {
 313     int i;
 314     uint32_t tin[4], xor[4], tout[4], data[4], iv[4];
 315
 316     memcpy(iv, ctx->iv, AES_IV_SIZE);
 317     for (i = 0; i < 4; i++)
 318         xor[i] = ntohl(iv[i]);
 319
 320     for (length -= 16; length >= 0; length -= 16)
 321     {
 322         uint32_t msg_32[4];
 323         uint32_t out_32[4];
 324         memcpy(msg_32, msg, AES_BLOCKSIZE);
 325         msg += AES_BLOCKSIZE;
 326
 327         for (i = 0; i < 4; i++)
 328         {
 329             tin[i] = ntohl(msg_32[i]);
 330             data[i] = tin[i];
 331         }
 332
 333         AES_decrypt(ctx, data);
 334
 335         for (i = 0; i < 4; i++)
 336         {
 337             tout[i] = data[i]^xor[i];
 338             xor[i] = tin[i];
 339             out_32[i] = htonl(tout[i]);
 340         }
 341
 342         memcpy(out, out_32, AES_BLOCKSIZE);
 343         out += AES_BLOCKSIZE;
 344     }
 345
 346     for (i = 0; i < 4; i++)
 347         iv[i] = htonl(xor[i]);
 348     memcpy(ctx->iv, iv, AES_IV_SIZE);
 349 }
 350
 351 /**
 352  * Encrypt a single block (16 bytes) of data
 353  */
 354 static void AES_encrypt(const AES_CTX *ctx, uint32_t *data)
 355 {
 356     /* To make this code smaller, generate the sbox entries on the fly.
 357      * This will have a really heavy effect upon performance.
 358      */
 359     uint32_t tmp[4];
 360     uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
 361     int curr_rnd;
 362     int rounds = ctx->rounds;
 363     const uint32_t *k = ctx->ks;
 364
 365     /* Pre-round key addition */
 366     for (row = 0; row < 4; row++)
 367         data[row] ^= *(k++);
 368
 369     /* Encrypt one block. */
 370     for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
 371     {
 372         /* Perform ByteSub and ShiftRow operations together */
 373         for (row = 0; row < 4; row++)
 374         {
 375             a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
 376             a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
 377             a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF];
 378             a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];
 379
 380             /* Perform MixColumn iff not last round */
 381             if (curr_rnd < (rounds - 1))
 382             {
 383                 tmp1 = a0 ^ a1 ^ a2 ^ a3;
 384                 old_a0 = a0;
 385                 a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
 386                 a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
 387                 a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
 388                 a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
 389             }
 390
 391             tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3);
 392         }
 393
 394         /* KeyAddition - note that it is vital that this loop is separate from
 395            the MixColumn operation, which must be atomic...*/
 396         for (row = 0; row < 4; row++)
 397             data[row] = tmp[row] ^ *(k++);
 398     }
 399 }
 400
 401 /**
 402  * Decrypt a single block (16 bytes) of data
 403  */
 404 static void AES_decrypt(const AES_CTX *ctx, uint32_t *data)
 405 {
 406     uint32_t tmp[4];
 407     uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
 408     uint32_t a0, a1, a2, a3, row;
 409     int curr_rnd;
 410     int rounds = ctx->rounds;
 411     const uint32_t *k = ctx->ks + ((rounds+1)*4);
 412
 413     /* pre-round key addition */
 414     for (row=4; row > 0;row--)
 415         data[row-1] ^= *(--k);
 416
 417     /* Decrypt one block */
 418     for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
 419     {
 420         /* Perform ByteSub and ShiftRow operations together */
 421         for (row = 4; row > 0; row--)
 422         {
 423             a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
 424             a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
 425             a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
 426             a3 = aes_isbox[(data[row%4])&0xFF];
 427
 428             /* Perform MixColumn iff not last round */
 429             if (curr_rnd<(rounds-1))
 430             {
 431                 /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
 432                    are quite large compared to encryption; this
 433                    operation slows decryption down noticeably. */
 434                 xt0 = AES_xtime(a0^a1);
 435                 xt1 = AES_xtime(a1^a2);
 436                 xt2 = AES_xtime(a2^a3);
 437                 xt3 = AES_xtime(a3^a0);
 438                 xt4 = AES_xtime(xt0^xt1);
 439                 xt5 = AES_xtime(xt1^xt2);
 440                 xt6 = AES_xtime(xt4^xt5);
 441
 442                 xt0 ^= a1^a2^a3^xt4^xt6;
 443                 xt1 ^= a0^a2^a3^xt5^xt6;
 444                 xt2 ^= a0^a1^a3^xt4^xt6;
 445                 xt3 ^= a0^a1^a2^xt5^xt6;
 446                 tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3);
 447             }
 448             else
 449                 tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3);
 450         }
 451
 452         for (row = 4; row > 0; row--)
 453             data[row-1] = tmp[row-1] ^ *(--k);
 454     }
 455 }
 456
 457 #endif