| 1 | /* |
| 2 | * copyright (c) 2007 Michael Niedermayer <michaelni@gmx.at> |
| 3 | * |
| 4 | * some optimization ideas from aes128.c by Reimar Doeffinger |
| 5 | * |
| 6 | * This file is part of FFmpeg. |
| 7 | * |
| 8 | * FFmpeg is free software; you can redistribute it and/or |
| 9 | * modify it under the terms of the GNU Lesser General Public |
| 10 | * License as published by the Free Software Foundation; either |
| 11 | * version 2.1 of the License, or (at your option) any later version. |
| 12 | * |
| 13 | * FFmpeg is distributed in the hope that it will be useful, |
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | * Lesser General Public License for more details. |
| 17 | * |
| 18 | * You should have received a copy of the GNU Lesser General Public |
| 19 | * License along with FFmpeg; if not, write to the Free Software |
| 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 21 | */ |
| 22 | |
| 23 | #include "common.h" |
| 24 | #include "aes.h" |
| 25 | #include "intreadwrite.h" |
| 26 | #include "timer.h" |
| 27 | |
| 28 | typedef union { |
| 29 | uint64_t u64[2]; |
| 30 | uint32_t u32[4]; |
| 31 | uint8_t u8x4[4][4]; |
| 32 | uint8_t u8[16]; |
| 33 | } av_aes_block; |
| 34 | |
| 35 | typedef struct AVAES { |
| 36 | // Note: round_key[16] is accessed in the init code, but this only |
| 37 | // overwrites state, which does not matter (see also commit ba554c0). |
| 38 | av_aes_block round_key[15]; |
| 39 | av_aes_block state[2]; |
| 40 | int rounds; |
| 41 | } AVAES; |
| 42 | |
| 43 | const int av_aes_size= sizeof(AVAES); |
| 44 | |
| 45 | struct AVAES *av_aes_alloc(void) |
| 46 | { |
| 47 | return av_mallocz(sizeof(struct AVAES)); |
| 48 | } |
| 49 | |
| 50 | static const uint8_t rcon[10] = { |
| 51 | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 |
| 52 | }; |
| 53 | |
| 54 | static uint8_t sbox[256]; |
| 55 | static uint8_t inv_sbox[256]; |
| 56 | #if CONFIG_SMALL |
| 57 | static uint32_t enc_multbl[1][256]; |
| 58 | static uint32_t dec_multbl[1][256]; |
| 59 | #else |
| 60 | static uint32_t enc_multbl[4][256]; |
| 61 | static uint32_t dec_multbl[4][256]; |
| 62 | #endif |
| 63 | |
| 64 | #if HAVE_BIGENDIAN |
| 65 | # define ROT(x, s) ((x >> s) | (x << (32-s))) |
| 66 | #else |
| 67 | # define ROT(x, s) ((x << s) | (x >> (32-s))) |
| 68 | #endif |
| 69 | |
| 70 | static inline void addkey(av_aes_block *dst, const av_aes_block *src, |
| 71 | const av_aes_block *round_key) |
| 72 | { |
| 73 | dst->u64[0] = src->u64[0] ^ round_key->u64[0]; |
| 74 | dst->u64[1] = src->u64[1] ^ round_key->u64[1]; |
| 75 | } |
| 76 | |
| 77 | static inline void addkey_s(av_aes_block *dst, const uint8_t *src, |
| 78 | const av_aes_block *round_key) |
| 79 | { |
| 80 | dst->u64[0] = AV_RN64(src) ^ round_key->u64[0]; |
| 81 | dst->u64[1] = AV_RN64(src + 8) ^ round_key->u64[1]; |
| 82 | } |
| 83 | |
| 84 | static inline void addkey_d(uint8_t *dst, const av_aes_block *src, |
| 85 | const av_aes_block *round_key) |
| 86 | { |
| 87 | AV_WN64(dst, src->u64[0] ^ round_key->u64[0]); |
| 88 | AV_WN64(dst + 8, src->u64[1] ^ round_key->u64[1]); |
| 89 | } |
| 90 | |
| 91 | static void subshift(av_aes_block s0[2], int s, const uint8_t *box) |
| 92 | { |
| 93 | av_aes_block *s1 = (av_aes_block *) (s0[0].u8 - s); |
| 94 | av_aes_block *s3 = (av_aes_block *) (s0[0].u8 + s); |
| 95 | |
| 96 | s0[0].u8[ 0] = box[s0[1].u8[ 0]]; |
| 97 | s0[0].u8[ 4] = box[s0[1].u8[ 4]]; |
| 98 | s0[0].u8[ 8] = box[s0[1].u8[ 8]]; |
| 99 | s0[0].u8[12] = box[s0[1].u8[12]]; |
| 100 | s1[0].u8[ 3] = box[s1[1].u8[ 7]]; |
| 101 | s1[0].u8[ 7] = box[s1[1].u8[11]]; |
| 102 | s1[0].u8[11] = box[s1[1].u8[15]]; |
| 103 | s1[0].u8[15] = box[s1[1].u8[ 3]]; |
| 104 | s0[0].u8[ 2] = box[s0[1].u8[10]]; |
| 105 | s0[0].u8[10] = box[s0[1].u8[ 2]]; |
| 106 | s0[0].u8[ 6] = box[s0[1].u8[14]]; |
| 107 | s0[0].u8[14] = box[s0[1].u8[ 6]]; |
| 108 | s3[0].u8[ 1] = box[s3[1].u8[13]]; |
| 109 | s3[0].u8[13] = box[s3[1].u8[ 9]]; |
| 110 | s3[0].u8[ 9] = box[s3[1].u8[ 5]]; |
| 111 | s3[0].u8[ 5] = box[s3[1].u8[ 1]]; |
| 112 | } |
| 113 | |
| 114 | static inline int mix_core(uint32_t multbl[][256], int a, int b, int c, int d){ |
| 115 | #if CONFIG_SMALL |
| 116 | return multbl[0][a] ^ ROT(multbl[0][b], 8) ^ ROT(multbl[0][c], 16) ^ ROT(multbl[0][d], 24); |
| 117 | #else |
| 118 | return multbl[0][a] ^ multbl[1][b] ^ multbl[2][c] ^ multbl[3][d]; |
| 119 | #endif |
| 120 | } |
| 121 | |
| 122 | static inline void mix(av_aes_block state[2], uint32_t multbl[][256], int s1, int s3){ |
| 123 | uint8_t (*src)[4] = state[1].u8x4; |
| 124 | state[0].u32[0] = mix_core(multbl, src[0][0], src[s1 ][1], src[2][2], src[s3 ][3]); |
| 125 | state[0].u32[1] = mix_core(multbl, src[1][0], src[s3-1][1], src[3][2], src[s1-1][3]); |
| 126 | state[0].u32[2] = mix_core(multbl, src[2][0], src[s3 ][1], src[0][2], src[s1 ][3]); |
| 127 | state[0].u32[3] = mix_core(multbl, src[3][0], src[s1-1][1], src[1][2], src[s3-1][3]); |
| 128 | } |
| 129 | |
| 130 | static inline void crypt(AVAES *a, int s, const uint8_t *sbox, |
| 131 | uint32_t multbl[][256]) |
| 132 | { |
| 133 | int r; |
| 134 | |
| 135 | for (r = a->rounds - 1; r > 0; r--) { |
| 136 | mix(a->state, multbl, 3 - s, 1 + s); |
| 137 | addkey(&a->state[1], &a->state[0], &a->round_key[r]); |
| 138 | } |
| 139 | |
| 140 | subshift(&a->state[0], s, sbox); |
| 141 | } |
| 142 | |
| 143 | void av_aes_crypt(AVAES *a, uint8_t *dst, const uint8_t *src, |
| 144 | int count, uint8_t *iv, int decrypt) |
| 145 | { |
| 146 | while (count--) { |
| 147 | addkey_s(&a->state[1], src, &a->round_key[a->rounds]); |
| 148 | if (decrypt) { |
| 149 | crypt(a, 0, inv_sbox, dec_multbl); |
| 150 | if (iv) { |
| 151 | addkey_s(&a->state[0], iv, &a->state[0]); |
| 152 | memcpy(iv, src, 16); |
| 153 | } |
| 154 | addkey_d(dst, &a->state[0], &a->round_key[0]); |
| 155 | } else { |
| 156 | if (iv) |
| 157 | addkey_s(&a->state[1], iv, &a->state[1]); |
| 158 | crypt(a, 2, sbox, enc_multbl); |
| 159 | addkey_d(dst, &a->state[0], &a->round_key[0]); |
| 160 | if (iv) |
| 161 | memcpy(iv, dst, 16); |
| 162 | } |
| 163 | src += 16; |
| 164 | dst += 16; |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | static void init_multbl2(uint32_t tbl[][256], const int c[4], |
| 169 | const uint8_t *log8, const uint8_t *alog8, |
| 170 | const uint8_t *sbox) |
| 171 | { |
| 172 | int i; |
| 173 | |
| 174 | for (i = 0; i < 256; i++) { |
| 175 | int x = sbox[i]; |
| 176 | if (x) { |
| 177 | int k, l, m, n; |
| 178 | x = log8[x]; |
| 179 | k = alog8[x + log8[c[0]]]; |
| 180 | l = alog8[x + log8[c[1]]]; |
| 181 | m = alog8[x + log8[c[2]]]; |
| 182 | n = alog8[x + log8[c[3]]]; |
| 183 | tbl[0][i] = AV_NE(MKBETAG(k,l,m,n), MKTAG(k,l,m,n)); |
| 184 | #if !CONFIG_SMALL |
| 185 | tbl[1][i] = ROT(tbl[0][i], 8); |
| 186 | tbl[2][i] = ROT(tbl[0][i], 16); |
| 187 | tbl[3][i] = ROT(tbl[0][i], 24); |
| 188 | #endif |
| 189 | } |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | // this is based on the reference AES code by Paulo Barreto and Vincent Rijmen |
| 194 | int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt) |
| 195 | { |
| 196 | int i, j, t, rconpointer = 0; |
| 197 | uint8_t tk[8][4]; |
| 198 | int KC = key_bits >> 5; |
| 199 | int rounds = KC + 6; |
| 200 | uint8_t log8[256]; |
| 201 | uint8_t alog8[512]; |
| 202 | |
| 203 | if (!enc_multbl[FF_ARRAY_ELEMS(enc_multbl)-1][FF_ARRAY_ELEMS(enc_multbl[0])-1]) { |
| 204 | j = 1; |
| 205 | for (i = 0; i < 255; i++) { |
| 206 | alog8[i] = alog8[i + 255] = j; |
| 207 | log8[j] = i; |
| 208 | j ^= j + j; |
| 209 | if (j > 255) |
| 210 | j ^= 0x11B; |
| 211 | } |
| 212 | for (i = 0; i < 256; i++) { |
| 213 | j = i ? alog8[255 - log8[i]] : 0; |
| 214 | j ^= (j << 1) ^ (j << 2) ^ (j << 3) ^ (j << 4); |
| 215 | j = (j ^ (j >> 8) ^ 99) & 255; |
| 216 | inv_sbox[j] = i; |
| 217 | sbox[i] = j; |
| 218 | } |
| 219 | init_multbl2(dec_multbl, (const int[4]) { 0xe, 0x9, 0xd, 0xb }, |
| 220 | log8, alog8, inv_sbox); |
| 221 | init_multbl2(enc_multbl, (const int[4]) { 0x2, 0x1, 0x1, 0x3 }, |
| 222 | log8, alog8, sbox); |
| 223 | } |
| 224 | |
| 225 | if (key_bits != 128 && key_bits != 192 && key_bits != 256) |
| 226 | return -1; |
| 227 | |
| 228 | a->rounds = rounds; |
| 229 | |
| 230 | memcpy(tk, key, KC * 4); |
| 231 | memcpy(a->round_key[0].u8, key, KC * 4); |
| 232 | |
| 233 | for (t = KC * 4; t < (rounds + 1) * 16; t += KC * 4) { |
| 234 | for (i = 0; i < 4; i++) |
| 235 | tk[0][i] ^= sbox[tk[KC - 1][(i + 1) & 3]]; |
| 236 | tk[0][0] ^= rcon[rconpointer++]; |
| 237 | |
| 238 | for (j = 1; j < KC; j++) { |
| 239 | if (KC != 8 || j != KC >> 1) |
| 240 | for (i = 0; i < 4; i++) |
| 241 | tk[j][i] ^= tk[j - 1][i]; |
| 242 | else |
| 243 | for (i = 0; i < 4; i++) |
| 244 | tk[j][i] ^= sbox[tk[j - 1][i]]; |
| 245 | } |
| 246 | |
| 247 | memcpy(a->round_key[0].u8 + t, tk, KC * 4); |
| 248 | } |
| 249 | |
| 250 | if (decrypt) { |
| 251 | for (i = 1; i < rounds; i++) { |
| 252 | av_aes_block tmp[3]; |
| 253 | tmp[2] = a->round_key[i]; |
| 254 | subshift(&tmp[1], 0, sbox); |
| 255 | mix(tmp, dec_multbl, 1, 3); |
| 256 | a->round_key[i] = tmp[0]; |
| 257 | } |
| 258 | } else { |
| 259 | for (i = 0; i < (rounds + 1) >> 1; i++) { |
| 260 | FFSWAP(av_aes_block, a->round_key[i], a->round_key[rounds-i]); |
| 261 | } |
| 262 | } |
| 263 | |
| 264 | return 0; |
| 265 | } |
| 266 | |
| 267 | #ifdef TEST |
| 268 | // LCOV_EXCL_START |
| 269 | #include <string.h> |
| 270 | #include "lfg.h" |
| 271 | #include "log.h" |
| 272 | |
| 273 | int main(int argc, char **argv) |
| 274 | { |
| 275 | int i, j; |
| 276 | AVAES b; |
| 277 | uint8_t rkey[2][16] = { |
| 278 | { 0 }, |
| 279 | { 0x10, 0xa5, 0x88, 0x69, 0xd7, 0x4b, 0xe5, 0xa3, |
| 280 | 0x74, 0xcf, 0x86, 0x7c, 0xfb, 0x47, 0x38, 0x59 } |
| 281 | }; |
| 282 | uint8_t pt[16], rpt[2][16]= { |
| 283 | { 0x6a, 0x84, 0x86, 0x7c, 0xd7, 0x7e, 0x12, 0xad, |
| 284 | 0x07, 0xea, 0x1b, 0xe8, 0x95, 0xc5, 0x3f, 0xa3 }, |
| 285 | { 0 } |
| 286 | }; |
| 287 | uint8_t rct[2][16]= { |
| 288 | { 0x73, 0x22, 0x81, 0xc0, 0xa0, 0xaa, 0xb8, 0xf7, |
| 289 | 0xa5, 0x4a, 0x0c, 0x67, 0xa0, 0xc4, 0x5e, 0xcf }, |
| 290 | { 0x6d, 0x25, 0x1e, 0x69, 0x44, 0xb0, 0x51, 0xe0, |
| 291 | 0x4e, 0xaa, 0x6f, 0xb4, 0xdb, 0xf7, 0x84, 0x65 } |
| 292 | }; |
| 293 | uint8_t temp[16]; |
| 294 | int err = 0; |
| 295 | |
| 296 | av_log_set_level(AV_LOG_DEBUG); |
| 297 | |
| 298 | for (i = 0; i < 2; i++) { |
| 299 | av_aes_init(&b, rkey[i], 128, 1); |
| 300 | av_aes_crypt(&b, temp, rct[i], 1, NULL, 1); |
| 301 | for (j = 0; j < 16; j++) { |
| 302 | if (rpt[i][j] != temp[j]) { |
| 303 | av_log(NULL, AV_LOG_ERROR, "%d %02X %02X\n", |
| 304 | j, rpt[i][j], temp[j]); |
| 305 | err = 1; |
| 306 | } |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | if (argc > 1 && !strcmp(argv[1], "-t")) { |
| 311 | AVAES ae, ad; |
| 312 | AVLFG prng; |
| 313 | |
| 314 | av_aes_init(&ae, "PI=3.141592654..", 128, 0); |
| 315 | av_aes_init(&ad, "PI=3.141592654..", 128, 1); |
| 316 | av_lfg_init(&prng, 1); |
| 317 | |
| 318 | for (i = 0; i < 10000; i++) { |
| 319 | for (j = 0; j < 16; j++) { |
| 320 | pt[j] = av_lfg_get(&prng); |
| 321 | } |
| 322 | { |
| 323 | START_TIMER; |
| 324 | av_aes_crypt(&ae, temp, pt, 1, NULL, 0); |
| 325 | if (!(i & (i - 1))) |
| 326 | av_log(NULL, AV_LOG_ERROR, "%02X %02X %02X %02X\n", |
| 327 | temp[0], temp[5], temp[10], temp[15]); |
| 328 | av_aes_crypt(&ad, temp, temp, 1, NULL, 1); |
| 329 | STOP_TIMER("aes"); |
| 330 | } |
| 331 | for (j = 0; j < 16; j++) { |
| 332 | if (pt[j] != temp[j]) { |
| 333 | av_log(NULL, AV_LOG_ERROR, "%d %d %02X %02X\n", |
| 334 | i, j, pt[j], temp[j]); |
| 335 | } |
| 336 | } |
| 337 | } |
| 338 | } |
| 339 | return err; |
| 340 | } |
| 341 | // LCOV_EXCL_STOP |
| 342 | #endif |