2 * Copyright (c) 2007, Cameron Rich
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 * * Neither the name of the axTLS project nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * AES implementation - this is a small code version. There are much faster
33 * versions around but they are much larger in size (i.e. they use large
41 /* all commented out in skeleton mode */
42 #ifndef CONFIG_SSL_SKELETON_MODE
44 #define rot1(x) (((x) << 24) | ((x) >> 8))
45 #define rot2(x) (((x) << 16) | ((x) >> 16))
46 #define rot3(x) (((x) << 8) | ((x) >> 24))
49 * This cute trick does 4 'mul by two' at once. Stolen from
50 * Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
51 * a standard graphics trick
52 * The key to this is that we need to xor with 0x1b if the top bit is set.
53 * a 1xxx xxxx 0xxx 0xxx First we mask the 7bit,
54 * b 1000 0000 0000 0000 then we shift right by 7 putting the 7bit in 0bit,
55 * c 0000 0001 0000 0000 we then subtract (c) from (b)
56 * d 0111 1111 0000 0000 and now we and with our mask
57 * e 0001 1011 0000 0000
63 #define mul2(x,t) ((t)=((x)&mt), \
64 ((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
66 #define inv_mix_col(x,f2,f4,f8,f9) (\
71 (f8)=((f2)^(f4)^(f8)), \
81 static const uint8_t aes_sbox
[256] =
83 0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
84 0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
85 0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
86 0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
87 0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
88 0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
89 0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
90 0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
91 0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
92 0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
93 0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
94 0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
95 0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
96 0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
97 0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
98 0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
99 0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
100 0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
101 0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
102 0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
103 0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
104 0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
105 0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
106 0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
107 0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
108 0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
109 0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
110 0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
111 0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
112 0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
113 0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
114 0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
120 static const uint8_t aes_isbox
[256] =
122 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
123 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
124 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
125 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
126 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
127 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
128 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
129 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
130 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
131 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
132 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
133 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
134 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
135 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
136 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
137 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
138 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
139 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
140 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
141 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
142 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
143 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
144 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
145 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
146 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
147 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
148 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
149 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
150 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
151 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
152 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
153 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
156 static const unsigned char Rcon
[30]=
158 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
159 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
160 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
161 0xb3,0x7d,0xfa,0xef,0xc5,0x91,
164 /* ----- static functions ----- */
165 static void AES_encrypt(const AES_CTX
*ctx
, uint32_t *data
);
166 static void AES_decrypt(const AES_CTX
*ctx
, uint32_t *data
);
168 /* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
170 static unsigned char AES_xtime(uint32_t x
)
172 return (x
&0x80) ? (x
<<1)^0x1b : x
<<1;
176 * Set up AES with the key/iv and cipher size.
178 void AES_set_key(AES_CTX
*ctx
, const uint8_t *key
,
179 const uint8_t *iv
, AES_MODE mode
)
182 uint32_t *W
, tmp
, tmp2
;
183 const unsigned char *ip
;
198 default: /* fail silently */
203 ctx
->key_size
= words
;
205 for (i
= 0; i
< words
; i
+=2)
207 W
[i
+0]= ((uint32_t)key
[ 0]<<24)|
208 ((uint32_t)key
[ 1]<<16)|
209 ((uint32_t)key
[ 2]<< 8)|
210 ((uint32_t)key
[ 3] );
211 W
[i
+1]= ((uint32_t)key
[ 4]<<24)|
212 ((uint32_t)key
[ 5]<<16)|
213 ((uint32_t)key
[ 6]<< 8)|
214 ((uint32_t)key
[ 7] );
219 ii
= 4 * (ctx
->rounds
+1);
220 for (i
= words
; i
<ii
; i
++)
224 if ((i
% words
) == 0)
226 tmp2
=(uint32_t)aes_sbox
[(tmp
)&0xff]<< 8;
227 tmp2
|=(uint32_t)aes_sbox
[(tmp
>> 8)&0xff]<<16;
228 tmp2
|=(uint32_t)aes_sbox
[(tmp
>>16)&0xff]<<24;
229 tmp2
|=(uint32_t)aes_sbox
[(tmp
>>24) ];
230 tmp
=tmp2
^(((unsigned int)*ip
)<<24);
234 if ((words
== 8) && ((i
% words
) == 4))
236 tmp2
=(uint32_t)aes_sbox
[(tmp
)&0xff] ;
237 tmp2
|=(uint32_t)aes_sbox
[(tmp
>> 8)&0xff]<< 8;
238 tmp2
|=(uint32_t)aes_sbox
[(tmp
>>16)&0xff]<<16;
239 tmp2
|=(uint32_t)aes_sbox
[(tmp
>>24) ]<<24;
246 /* copy the iv across */
247 memcpy(ctx
->iv
, iv
, 16);
251 * Change a key for decryption.
253 void AES_convert_key(AES_CTX
*ctx
)
256 uint32_t *k
,w
,t1
,t2
,t3
,t4
;
261 for (i
= ctx
->rounds
*4; i
> 4; i
--)
264 w
= inv_mix_col(w
,t1
,t2
,t3
,t4
);
270 * Encrypt a byte sequence (with a block size 16) using the AES cipher.
272 void AES_cbc_encrypt(AES_CTX
*ctx
, const uint8_t *msg
, uint8_t *out
, int length
)
275 uint32_t tin
[4], tout
[4], iv
[4];
277 memcpy(iv
, ctx
->iv
, AES_IV_SIZE
);
278 for (i
= 0; i
< 4; i
++)
279 tout
[i
] = ntohl(iv
[i
]);
281 for (length
-= AES_BLOCKSIZE
; length
>= 0; length
-= AES_BLOCKSIZE
)
285 memcpy(msg_32
, msg
, AES_BLOCKSIZE
);
286 msg
+= AES_BLOCKSIZE
;
288 for (i
= 0; i
< 4; i
++)
289 tin
[i
] = ntohl(msg_32
[i
])^tout
[i
];
291 AES_encrypt(ctx
, tin
);
293 for (i
= 0; i
< 4; i
++)
296 out_32
[i
] = htonl(tout
[i
]);
299 memcpy(out
, out_32
, AES_BLOCKSIZE
);
300 out
+= AES_BLOCKSIZE
;
303 for (i
= 0; i
< 4; i
++)
304 iv
[i
] = htonl(tout
[i
]);
305 memcpy(ctx
->iv
, iv
, AES_IV_SIZE
);
309 * Decrypt a byte sequence (with a block size 16) using the AES cipher.
311 void AES_cbc_decrypt(AES_CTX
*ctx
, const uint8_t *msg
, uint8_t *out
, int length
)
314 uint32_t tin
[4], xor[4], tout
[4], data
[4], iv
[4];
316 memcpy(iv
, ctx
->iv
, AES_IV_SIZE
);
317 for (i
= 0; i
< 4; i
++)
318 xor[i
] = ntohl(iv
[i
]);
320 for (length
-= 16; length
>= 0; length
-= 16)
324 memcpy(msg_32
, msg
, AES_BLOCKSIZE
);
325 msg
+= AES_BLOCKSIZE
;
327 for (i
= 0; i
< 4; i
++)
329 tin
[i
] = ntohl(msg_32
[i
]);
333 AES_decrypt(ctx
, data
);
335 for (i
= 0; i
< 4; i
++)
337 tout
[i
] = data
[i
]^xor[i
];
339 out_32
[i
] = htonl(tout
[i
]);
342 memcpy(out
, out_32
, AES_BLOCKSIZE
);
343 out
+= AES_BLOCKSIZE
;
346 for (i
= 0; i
< 4; i
++)
347 iv
[i
] = htonl(xor[i
]);
348 memcpy(ctx
->iv
, iv
, AES_IV_SIZE
);
352 * Encrypt a single block (16 bytes) of data
354 static void AES_encrypt(const AES_CTX
*ctx
, uint32_t *data
)
356 /* To make this code smaller, generate the sbox entries on the fly.
357 * This will have a really heavy effect upon performance.
360 uint32_t tmp1
, old_a0
, a0
, a1
, a2
, a3
, row
;
362 int rounds
= ctx
->rounds
;
363 const uint32_t *k
= ctx
->ks
;
365 /* Pre-round key addition */
366 for (row
= 0; row
< 4; row
++)
369 /* Encrypt one block. */
370 for (curr_rnd
= 0; curr_rnd
< rounds
; curr_rnd
++)
372 /* Perform ByteSub and ShiftRow operations together */
373 for (row
= 0; row
< 4; row
++)
375 a0
= (uint32_t)aes_sbox
[(data
[row
%4]>>24)&0xFF];
376 a1
= (uint32_t)aes_sbox
[(data
[(row
+1)%4]>>16)&0xFF];
377 a2
= (uint32_t)aes_sbox
[(data
[(row
+2)%4]>>8)&0xFF];
378 a3
= (uint32_t)aes_sbox
[(data
[(row
+3)%4])&0xFF];
380 /* Perform MixColumn iff not last round */
381 if (curr_rnd
< (rounds
- 1))
383 tmp1
= a0
^ a1
^ a2
^ a3
;
385 a0
^= tmp1
^ AES_xtime(a0
^ a1
);
386 a1
^= tmp1
^ AES_xtime(a1
^ a2
);
387 a2
^= tmp1
^ AES_xtime(a2
^ a3
);
388 a3
^= tmp1
^ AES_xtime(a3
^ old_a0
);
391 tmp
[row
] = ((a0
<< 24) | (a1
<< 16) | (a2
<< 8) | a3
);
394 /* KeyAddition - note that it is vital that this loop is separate from
395 the MixColumn operation, which must be atomic...*/
396 for (row
= 0; row
< 4; row
++)
397 data
[row
] = tmp
[row
] ^ *(k
++);
402 * Decrypt a single block (16 bytes) of data
404 static void AES_decrypt(const AES_CTX
*ctx
, uint32_t *data
)
407 uint32_t xt0
,xt1
,xt2
,xt3
,xt4
,xt5
,xt6
;
408 uint32_t a0
, a1
, a2
, a3
, row
;
410 int rounds
= ctx
->rounds
;
411 const uint32_t *k
= ctx
->ks
+ ((rounds
+1)*4);
413 /* pre-round key addition */
414 for (row
=4; row
> 0;row
--)
415 data
[row
-1] ^= *(--k
);
417 /* Decrypt one block */
418 for (curr_rnd
= 0; curr_rnd
< rounds
; curr_rnd
++)
420 /* Perform ByteSub and ShiftRow operations together */
421 for (row
= 4; row
> 0; row
--)
423 a0
= aes_isbox
[(data
[(row
+3)%4]>>24)&0xFF];
424 a1
= aes_isbox
[(data
[(row
+2)%4]>>16)&0xFF];
425 a2
= aes_isbox
[(data
[(row
+1)%4]>>8)&0xFF];
426 a3
= aes_isbox
[(data
[row
%4])&0xFF];
428 /* Perform MixColumn iff not last round */
429 if (curr_rnd
<(rounds
-1))
431 /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
432 are quite large compared to encryption; this
433 operation slows decryption down noticeably. */
434 xt0
= AES_xtime(a0
^a1
);
435 xt1
= AES_xtime(a1
^a2
);
436 xt2
= AES_xtime(a2
^a3
);
437 xt3
= AES_xtime(a3
^a0
);
438 xt4
= AES_xtime(xt0
^xt1
);
439 xt5
= AES_xtime(xt1
^xt2
);
440 xt6
= AES_xtime(xt4
^xt5
);
442 xt0
^= a1
^a2
^a3
^xt4
^xt6
;
443 xt1
^= a0
^a2
^a3
^xt5
^xt6
;
444 xt2
^= a0
^a1
^a3
^xt4
^xt6
;
445 xt3
^= a0
^a1
^a2
^xt5
^xt6
;
446 tmp
[row
-1] = ((xt0
<<24)|(xt1
<<16)|(xt2
<<8)|xt3
);
449 tmp
[row
-1] = ((a0
<<24)|(a1
<<16)|(a2
<<8)|a3
);
452 for (row
= 4; row
> 0; row
--)
453 data
[row
-1] = tmp
[row
-1] ^ *(--k
);