[deb_shairplay.git] / src / lib / crypto / aes.c

/*
 * Copyright (c) 2007, Cameron Rich
 * 
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions are met:
 *
 * * Redistributions of source code must retain the above copyright notice, 
 *   this list of conditions and the following disclaimer.
 * * Redistributions in binary form must reproduce the above copyright notice, 
 *   this list of conditions and the following disclaimer in the documentation 
 *   and/or other materials provided with the distribution.
 * * Neither the name of the axTLS project nor the names of its contributors 
 *   may be used to endorse or promote products derived from this software 
 *   without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/**
 * AES implementation - this is a small code version. There are much faster
 * versions around but they are much larger in size (i.e. they use large 
 * submix tables).
 */

#include <string.h>
#include "os_port.h"
#include "crypto.h"

/* all commented out in skeleton mode */
#ifndef CONFIG_SSL_SKELETON_MODE

#define rot1(x) (((x) << 24) | ((x) >> 8))
#define rot2(x) (((x) << 16) | ((x) >> 16))
#define rot3(x) (((x) <<  8) | ((x) >> 24))

/* 
 * This cute trick does 4 'mul by two' at once.  Stolen from
 * Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
 * a standard graphics trick
 * The key to this is that we need to xor with 0x1b if the top bit is set.
 * a 1xxx xxxx   0xxx 0xxx First we mask the 7bit,
 * b 1000 0000   0000 0000 then we shift right by 7 putting the 7bit in 0bit,
 * c 0000 0001   0000 0000 we then subtract (c) from (b)
 * d 0111 1111   0000 0000 and now we and with our mask
 * e 0001 1011   0000 0000
 */
#define mt  0x80808080
#define ml  0x7f7f7f7f
#define mh  0xfefefefe
#define mm  0x1b1b1b1b
#define mul2(x,t)	((t)=((x)&mt), \
			((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))

#define inv_mix_col(x,f2,f4,f8,f9) (\
			(f2)=mul2(x,f2), \
			(f4)=mul2(f2,f4), \
			(f8)=mul2(f4,f8), \
			(f9)=(x)^(f8), \
			(f8)=((f2)^(f4)^(f8)), \
			(f2)^=(f9), \
			(f4)^=(f9), \
			(f8)^=rot3(f2), \
			(f8)^=rot2(f4), \
			(f8)^rot1(f9))

/*
 * AES S-box
 */
static const uint8_t aes_sbox[256] =
{
	0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
	0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
	0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
	0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
	0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
	0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
	0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
	0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
	0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
	0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
	0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
	0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
	0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
	0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
	0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
	0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
	0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
	0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
	0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
	0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
	0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
	0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
	0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
	0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
	0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
	0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
	0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
	0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
	0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
	0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
	0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
	0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
};

/*
 * AES is-box
 */
static const uint8_t aes_isbox[256] = 
{
    0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
    0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
    0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
    0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
    0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
    0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
    0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
    0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
    0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
    0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
    0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
    0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
    0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
    0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
    0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
    0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
    0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
    0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
    0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
    0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
    0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
    0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
    0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
    0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
    0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
    0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
    0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
    0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
    0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
    0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
    0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
    0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
};

static const unsigned char Rcon[30]=
{
	0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
	0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
	0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
	0xb3,0x7d,0xfa,0xef,0xc5,0x91,
};

/* ----- static functions ----- */
static void AES_encrypt(const AES_CTX *ctx, uint32_t *data);
static void AES_decrypt(const AES_CTX *ctx, uint32_t *data);

/* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
   x^8+x^4+x^3+x+1 */
static unsigned char AES_xtime(uint32_t x)
{
	return (x&0x80) ? (x<<1)^0x1b : x<<1;
}

/**
 * Set up AES with the key/iv and cipher size.
 */
void AES_set_key(AES_CTX *ctx, const uint8_t *key, 
        const uint8_t *iv, AES_MODE mode)
{
    int i, ii;
    uint32_t *W, tmp, tmp2;
    const unsigned char *ip;
    int words;

    switch (mode)
    {
        case AES_MODE_128:
            i = 10;
            words = 4;
            break;

        case AES_MODE_256:
            i = 14;
            words = 8;
            break;

        default:        /* fail silently */
            return;
    }

    ctx->rounds = i;
    ctx->key_size = words;
    W = ctx->ks;
    for (i = 0; i < words; i+=2)
    {
        W[i+0]=	((uint32_t)key[ 0]<<24)|
            ((uint32_t)key[ 1]<<16)|
            ((uint32_t)key[ 2]<< 8)|
            ((uint32_t)key[ 3]    );
        W[i+1]=	((uint32_t)key[ 4]<<24)|
            ((uint32_t)key[ 5]<<16)|
            ((uint32_t)key[ 6]<< 8)|
            ((uint32_t)key[ 7]    );
        key += 8;
    }

    ip = Rcon;
    ii = 4 * (ctx->rounds+1);
    for (i = words; i<ii; i++)
    {
        tmp = W[i-1];

        if ((i % words) == 0)
        {
            tmp2 =(uint32_t)aes_sbox[(tmp    )&0xff]<< 8;
            tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
            tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
            tmp2|=(uint32_t)aes_sbox[(tmp>>24)     ];
            tmp=tmp2^(((unsigned int)*ip)<<24);
            ip++;
        }

        if ((words == 8) && ((i % words) == 4))
        {
            tmp2 =(uint32_t)aes_sbox[(tmp    )&0xff]    ;
            tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
            tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
            tmp2|=(uint32_t)aes_sbox[(tmp>>24)     ]<<24;
            tmp=tmp2;
        }

        W[i]=W[i-words]^tmp;
    }

    /* copy the iv across */
    memcpy(ctx->iv, iv, 16);
}

/**
 * Change a key for decryption.
 */
void AES_convert_key(AES_CTX *ctx)
{
    int i;
    uint32_t *k,w,t1,t2,t3,t4;

    k = ctx->ks;
    k += 4;

    for (i= ctx->rounds*4; i > 4; i--)
    {
        w= *k;
        w = inv_mix_col(w,t1,t2,t3,t4);
        *k++ =w;
    }
}

/**
 * Encrypt a byte sequence (with a block size 16) using the AES cipher.
 */
void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
{
    int i;
    uint32_t tin[4], tout[4], iv[4];

    memcpy(iv, ctx->iv, AES_IV_SIZE);
    for (i = 0; i < 4; i++)
        tout[i] = ntohl(iv[i]);

    for (length -= AES_BLOCKSIZE; length >= 0; length -= AES_BLOCKSIZE)
    {
        uint32_t msg_32[4];
        uint32_t out_32[4];
        memcpy(msg_32, msg, AES_BLOCKSIZE);
        msg += AES_BLOCKSIZE;

        for (i = 0; i < 4; i++)
            tin[i] = ntohl(msg_32[i])^tout[i];

        AES_encrypt(ctx, tin);

        for (i = 0; i < 4; i++)
        {
            tout[i] = tin[i]; 
            out_32[i] = htonl(tout[i]);
        }

        memcpy(out, out_32, AES_BLOCKSIZE);
        out += AES_BLOCKSIZE;
    }

    for (i = 0; i < 4; i++)
        iv[i] = htonl(tout[i]);
    memcpy(ctx->iv, iv, AES_IV_SIZE);
}

/**
 * Decrypt a byte sequence (with a block size 16) using the AES cipher.
 */
void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
{
    int i;
    uint32_t tin[4], xor[4], tout[4], data[4], iv[4];

    memcpy(iv, ctx->iv, AES_IV_SIZE);
    for (i = 0; i < 4; i++)
        xor[i] = ntohl(iv[i]);

    for (length -= 16; length >= 0; length -= 16)
    {
        uint32_t msg_32[4];
        uint32_t out_32[4];
        memcpy(msg_32, msg, AES_BLOCKSIZE);
        msg += AES_BLOCKSIZE;

        for (i = 0; i < 4; i++)
        {
            tin[i] = ntohl(msg_32[i]);
            data[i] = tin[i];
        }

        AES_decrypt(ctx, data);

        for (i = 0; i < 4; i++)
        {
            tout[i] = data[i]^xor[i];
            xor[i] = tin[i];
            out_32[i] = htonl(tout[i]);
        }

        memcpy(out, out_32, AES_BLOCKSIZE);
        out += AES_BLOCKSIZE;
    }

    for (i = 0; i < 4; i++)
        iv[i] = htonl(xor[i]);
    memcpy(ctx->iv, iv, AES_IV_SIZE);
}

/**
 * Encrypt a single block (16 bytes) of data
 */
static void AES_encrypt(const AES_CTX *ctx, uint32_t *data)
{
    /* To make this code smaller, generate the sbox entries on the fly.
     * This will have a really heavy effect upon performance.
     */
    uint32_t tmp[4];
    uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
    int curr_rnd;
    int rounds = ctx->rounds; 
    const uint32_t *k = ctx->ks;

    /* Pre-round key addition */
    for (row = 0; row < 4; row++)
        data[row] ^= *(k++);

    /* Encrypt one block. */
    for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
    {
        /* Perform ByteSub and ShiftRow operations together */
        for (row = 0; row < 4; row++)
        {
            a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
            a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
            a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF]; 
            a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];

            /* Perform MixColumn iff not last round */
            if (curr_rnd < (rounds - 1))
            {
                tmp1 = a0 ^ a1 ^ a2 ^ a3;
                old_a0 = a0;
                a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
                a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
                a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
                a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
            }

            tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3);
        }

        /* KeyAddition - note that it is vital that this loop is separate from
           the MixColumn operation, which must be atomic...*/ 
        for (row = 0; row < 4; row++)
            data[row] = tmp[row] ^ *(k++);
    }
}

/**
 * Decrypt a single block (16 bytes) of data
 */
static void AES_decrypt(const AES_CTX *ctx, uint32_t *data)
{ 
    uint32_t tmp[4];
    uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
    uint32_t a0, a1, a2, a3, row;
    int curr_rnd;
    int rounds = ctx->rounds;
    const uint32_t *k = ctx->ks + ((rounds+1)*4);

    /* pre-round key addition */
    for (row=4; row > 0;row--)
        data[row-1] ^= *(--k);

    /* Decrypt one block */
    for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
    {
        /* Perform ByteSub and ShiftRow operations together */
        for (row = 4; row > 0; row--)
        {
            a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
            a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
            a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
            a3 = aes_isbox[(data[row%4])&0xFF];

            /* Perform MixColumn iff not last round */
            if (curr_rnd<(rounds-1))
            {
                /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
                   are quite large compared to encryption; this 
                   operation slows decryption down noticeably. */
                xt0 = AES_xtime(a0^a1);
                xt1 = AES_xtime(a1^a2);
                xt2 = AES_xtime(a2^a3);
                xt3 = AES_xtime(a3^a0);
                xt4 = AES_xtime(xt0^xt1);
                xt5 = AES_xtime(xt1^xt2);
                xt6 = AES_xtime(xt4^xt5);

                xt0 ^= a1^a2^a3^xt4^xt6;
                xt1 ^= a0^a2^a3^xt5^xt6;
                xt2 ^= a0^a1^a3^xt4^xt6;
                xt3 ^= a0^a1^a2^xt5^xt6;
                tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3);
            }
            else
                tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3);
        }

        for (row = 4; row > 0; row--)
            data[row-1] = tmp[row-1] ^ *(--k);
    }
}

#endif
Commit	Line	Data
15c988f7 JB	1	/*
	2	* Copyright (c) 2007, Cameron Rich
	3	*
	4	* All rights reserved.
	5	*
	6	* Redistribution and use in source and binary forms, with or without
	7	* modification, are permitted provided that the following conditions are met:
	8	*
	9	* * Redistributions of source code must retain the above copyright notice,
	10	* this list of conditions and the following disclaimer.
	11	* * Redistributions in binary form must reproduce the above copyright notice,
	12	* this list of conditions and the following disclaimer in the documentation
	13	* and/or other materials provided with the distribution.
	14	* * Neither the name of the axTLS project nor the names of its contributors
	15	* may be used to endorse or promote products derived from this software
	16	* without specific prior written permission.
	17	*
	18	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	19	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	20	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	21	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
	22	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	23	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	24	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	25	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	26	* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	27	* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	28	* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	29	*/
	30
	31	/**
	32	* AES implementation - this is a small code version. There are much faster
	33	* versions around but they are much larger in size (i.e. they use large
	34	* submix tables).
	35	*/
	36
	37	#include <string.h>
	38	#include "os_port.h"
	39	#include "crypto.h"
	40
	41	/* all commented out in skeleton mode */
	42	#ifndef CONFIG_SSL_SKELETON_MODE
	43
	44	#define rot1(x) (((x) << 24) \| ((x) >> 8))
	45	#define rot2(x) (((x) << 16) \| ((x) >> 16))
	46	#define rot3(x) (((x) << 8) \| ((x) >> 24))
	47
	48	/*
	49	* This cute trick does 4 'mul by two' at once. Stolen from
	50	* Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
	51	* a standard graphics trick
	52	* The key to this is that we need to xor with 0x1b if the top bit is set.
	53	* a 1xxx xxxx 0xxx 0xxx First we mask the 7bit,
	54	* b 1000 0000 0000 0000 then we shift right by 7 putting the 7bit in 0bit,
	55	* c 0000 0001 0000 0000 we then subtract (c) from (b)
	56	* d 0111 1111 0000 0000 and now we and with our mask
	57	* e 0001 1011 0000 0000
	58	*/
	59	#define mt 0x80808080
	60	#define ml 0x7f7f7f7f
	61	#define mh 0xfefefefe
	62	#define mm 0x1b1b1b1b
	63	#define mul2(x,t) ((t)=((x)&mt), \
	64	((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
65
66	#define inv_mix_col(x,f2,f4,f8,f9) (\
67	(f2)=mul2(x,f2), \
68	(f4)=mul2(f2,f4), \
69	(f8)=mul2(f4,f8), \
70	(f9)=(x)^(f8), \
71	(f8)=((f2)^(f4)^(f8)), \
72	(f2)^=(f9), \
73	(f4)^=(f9), \
74	(f8)^=rot3(f2), \
75	(f8)^=rot2(f4), \
76	(f8)^rot1(f9))
77
78	/*
79	* AES S-box
80	*/
81	static const uint8_t aes_sbox[256] =
82	{
83	0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
84	0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
85	0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
86	0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
87	0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
88	0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
89	0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
90	0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
91	0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
92	0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
93	0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
94	0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
95	0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
96	0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
97	0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
98	0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
99	0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
100	0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
101	0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
102	0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
103	0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
104	0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
105	0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
106	0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
107	0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
108	0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
109	0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
110	0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
111	0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
112	0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
113	0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
114	0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
115	};
116
117	/*
118	* AES is-box
119	*/
120	static const uint8_t aes_isbox[256] =
121	{
122	0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
123	0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
124	0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
125	0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
126	0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
127	0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
128	0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
129	0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
130	0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
131	0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
132	0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
133	0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
134	0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
135	0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
136	0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
137	0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
138	0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
139	0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
140	0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
141	0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
142	0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
143	0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
144	0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
145	0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
146	0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
147	0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
148	0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
149	0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
150	0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
151	0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
152	0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
153	0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
154	};
155
156	static const unsigned char Rcon[30]=
157	{
158	0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
159	0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
160	0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
161	0xb3,0x7d,0xfa,0xef,0xc5,0x91,
162	};
163
164	/* ----- static functions ----- */
165	static void AES_encrypt(const AES_CTX ctx, uint32_t data);
166	static void AES_decrypt(const AES_CTX ctx, uint32_t data);
167
168	/* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
169	x^8+x^4+x^3+x+1 */
170	static unsigned char AES_xtime(uint32_t x)
171	{
172	return (x&0x80) ? (x<<1)^0x1b : x<<1;
173	}
174
175	/**
176	* Set up AES with the key/iv and cipher size.
177	*/
178	void AES_set_key(AES_CTX ctx, const uint8_t key,
179	const uint8_t *iv, AES_MODE mode)
180	{
181	int i, ii;
182	uint32_t *W, tmp, tmp2;
183	const unsigned char *ip;
184	int words;
185
186	switch (mode)
187	{
188	case AES_MODE_128:
189	i = 10;
190	words = 4;
191	break;
192
193	case AES_MODE_256:
194	i = 14;
195	words = 8;
196	break;
197
198	default: /* fail silently */
199	return;
200	}
201
202	ctx->rounds = i;
203	ctx->key_size = words;
204	W = ctx->ks;
205	for (i = 0; i < words; i+=2)
206	{
207	W[i+0]= ((uint32_t)key[ 0]<<24)\|
208	((uint32_t)key[ 1]<<16)\|
209	((uint32_t)key[ 2]<< 8)\|
210	((uint32_t)key[ 3] );
211	W[i+1]= ((uint32_t)key[ 4]<<24)\|
212	((uint32_t)key[ 5]<<16)\|
213	((uint32_t)key[ 6]<< 8)\|
214	((uint32_t)key[ 7] );
215	key += 8;
216	}
217
218	ip = Rcon;
219	ii = 4 * (ctx->rounds+1);
220	for (i = words; i<ii; i++)
221	{
222	tmp = W[i-1];
223
224	if ((i % words) == 0)
225	{
226	tmp2 =(uint32_t)aes_sbox[(tmp )&0xff]<< 8;
227	tmp2\|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
228	tmp2\|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
229	tmp2\|=(uint32_t)aes_sbox[(tmp>>24) ];
230	tmp=tmp2^(((unsigned int)*ip)<<24);
231	ip++;
232	}
233
234	if ((words == 8) && ((i % words) == 4))
235	{
236	tmp2 =(uint32_t)aes_sbox[(tmp )&0xff] ;
237	tmp2\|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
238	tmp2\|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
239	tmp2\|=(uint32_t)aes_sbox[(tmp>>24) ]<<24;
240	tmp=tmp2;
241	}
242
243	W[i]=W[i-words]^tmp;
244	}
245
246	/* copy the iv across */
247	memcpy(ctx->iv, iv, 16);
248	}
249
250	/**
251	* Change a key for decryption.
252	*/
253	void AES_convert_key(AES_CTX *ctx)
254	{
255	int i;
256	uint32_t *k,w,t1,t2,t3,t4;
257
258	k = ctx->ks;
259	k += 4;
260
261	for (i= ctx->rounds*4; i > 4; i--)
262	{
263	w= *k;
264	w = inv_mix_col(w,t1,t2,t3,t4);
265	*k++ =w;
266	}
267	}
268
269	/**
270	* Encrypt a byte sequence (with a block size 16) using the AES cipher.
271	*/
272	void AES_cbc_encrypt(AES_CTX ctx, const uint8_t msg, uint8_t *out, int length)
273	{
274	int i;
275	uint32_t tin[4], tout[4], iv[4];
276
277	memcpy(iv, ctx->iv, AES_IV_SIZE);
278	for (i = 0; i < 4; i++)
279	tout[i] = ntohl(iv[i]);
280
281	for (length -= AES_BLOCKSIZE; length >= 0; length -= AES_BLOCKSIZE)
282	{
283	uint32_t msg_32[4];
284	uint32_t out_32[4];
285	memcpy(msg_32, msg, AES_BLOCKSIZE);
286	msg += AES_BLOCKSIZE;
287
288	for (i = 0; i < 4; i++)
289	tin[i] = ntohl(msg_32[i])^tout[i];
290
291	AES_encrypt(ctx, tin);
292
293	for (i = 0; i < 4; i++)
294	{
295	tout[i] = tin[i];
296	out_32[i] = htonl(tout[i]);
297	}
298
299	memcpy(out, out_32, AES_BLOCKSIZE);
300	out += AES_BLOCKSIZE;
301	}
302
303	for (i = 0; i < 4; i++)
304	iv[i] = htonl(tout[i]);
305	memcpy(ctx->iv, iv, AES_IV_SIZE);
306	}
307
308	/**
309	* Decrypt a byte sequence (with a block size 16) using the AES cipher.
310	*/
311	void AES_cbc_decrypt(AES_CTX ctx, const uint8_t msg, uint8_t *out, int length)
312	{
313	int i;
314	uint32_t tin[4], xor[4], tout[4], data[4], iv[4];
315
316	memcpy(iv, ctx->iv, AES_IV_SIZE);
317	for (i = 0; i < 4; i++)
318	xor[i] = ntohl(iv[i]);
319
320	for (length -= 16; length >= 0; length -= 16)
321	{
322	uint32_t msg_32[4];
323	uint32_t out_32[4];
324	memcpy(msg_32, msg, AES_BLOCKSIZE);
325	msg += AES_BLOCKSIZE;
326
327	for (i = 0; i < 4; i++)
328	{
329	tin[i] = ntohl(msg_32[i]);
330	data[i] = tin[i];
331	}
332
333	AES_decrypt(ctx, data);
334
335	for (i = 0; i < 4; i++)
336	{
337	tout[i] = data[i]^xor[i];
338	xor[i] = tin[i];
339	out_32[i] = htonl(tout[i]);
340	}
341
342	memcpy(out, out_32, AES_BLOCKSIZE);
343	out += AES_BLOCKSIZE;
344	}
345
346	for (i = 0; i < 4; i++)
347	iv[i] = htonl(xor[i]);
348	memcpy(ctx->iv, iv, AES_IV_SIZE);
349	}
350
351	/**
352	* Encrypt a single block (16 bytes) of data
353	*/
354	static void AES_encrypt(const AES_CTX ctx, uint32_t data)
355	{
356	/* To make this code smaller, generate the sbox entries on the fly.
357	* This will have a really heavy effect upon performance.
358	*/
359	uint32_t tmp[4];
360	uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
361	int curr_rnd;
362	int rounds = ctx->rounds;
363	const uint32_t *k = ctx->ks;
364
365	/* Pre-round key addition */
366	for (row = 0; row < 4; row++)
367	data[row] ^= *(k++);
368
369	/* Encrypt one block. */
370	for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
371	{
372	/* Perform ByteSub and ShiftRow operations together */
373	for (row = 0; row < 4; row++)
374	{
375	a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
376	a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
377	a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF];
378	a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];
379
380	/* Perform MixColumn iff not last round */
381	if (curr_rnd < (rounds - 1))
382	{
383	tmp1 = a0 ^ a1 ^ a2 ^ a3;
384	old_a0 = a0;
385	a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
386	a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
387	a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
388	a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
389	}
390
391	tmp[row] = ((a0 << 24) \| (a1 << 16) \| (a2 << 8) \| a3);
392	}
393
394	/* KeyAddition - note that it is vital that this loop is separate from
395	the MixColumn operation, which must be atomic...*/
396	for (row = 0; row < 4; row++)
397	data[row] = tmp[row] ^ *(k++);
398	}
399	}
400
401	/**
402	* Decrypt a single block (16 bytes) of data
403	*/
404	static void AES_decrypt(const AES_CTX ctx, uint32_t data)
405	{
406	uint32_t tmp[4];
407	uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
408	uint32_t a0, a1, a2, a3, row;
409	int curr_rnd;
410	int rounds = ctx->rounds;
411	const uint32_t k = ctx->ks + ((rounds+1)4);
412
413	/* pre-round key addition */
414	for (row=4; row > 0;row--)
415	data[row-1] ^= *(--k);
416
417	/* Decrypt one block */
418	for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
419	{
420	/* Perform ByteSub and ShiftRow operations together */
421	for (row = 4; row > 0; row--)
422	{
423	a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
424	a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
425	a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
426	a3 = aes_isbox[(data[row%4])&0xFF];
427
428	/* Perform MixColumn iff not last round */
429	if (curr_rnd<(rounds-1))
430	{
431	/* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
432	are quite large compared to encryption; this
433	operation slows decryption down noticeably. */
434	xt0 = AES_xtime(a0^a1);
435	xt1 = AES_xtime(a1^a2);
436	xt2 = AES_xtime(a2^a3);
437	xt3 = AES_xtime(a3^a0);
438	xt4 = AES_xtime(xt0^xt1);
439	xt5 = AES_xtime(xt1^xt2);
440	xt6 = AES_xtime(xt4^xt5);
441
442	xt0 ^= a1^a2^a3^xt4^xt6;
443	xt1 ^= a0^a2^a3^xt5^xt6;
444	xt2 ^= a0^a1^a3^xt4^xt6;
445	xt3 ^= a0^a1^a2^xt5^xt6;
446	tmp[row-1] = ((xt0<<24)\|(xt1<<16)\|(xt2<<8)\|xt3);
447	}
448	else
449	tmp[row-1] = ((a0<<24)\|(a1<<16)\|(a2<<8)\|a3);
450	}
451
452	for (row = 4; row > 0; row--)
453	data[row-1] = tmp[row-1] ^ *(--k);
454	}
455	}
456
457	#endif