
/* This is an independent implementation of the DFC encryption      */
/* algorithm designed by a team at CNRS and France Telecom and      */
/* submitted as a candidate in the US NIST Advanced Encryption      */
/* Standard (AES) programme.                                        */
/*                                                                  */
/* Copyright in this implementation is held by Dr B R Gladman but   */
/* I hereby give permission for its free direct or derivative use   */
/* subject to acknowledgment of its origin and compliance with any  */
/* constraints that the designers of DFC place on the use of this   */
/* algorithm.                                                       */
/*                                                                  */
/* This implementation is intended as a reference implementation    */
/* and has not been optimised for speed (however, a small assembler */
/* coded section is used to improve the speed of multiple length    */
/* multiplication and addition).                                    */
/*                                                                  */
/* My thanks go to Serge Vaudenay of the Ecole Normale Superieure   */
/* for providing test vectors. This implementation has also been    */
/* tested with an independent implementation by Dr Russell Bradford */
/* (Department of Mathematical Sciences, University of Bath, Bath,  */
/* UK) and checks out.   My thanks go to Russell for his help in    */
/* comparing our implementations and finding bugs (and for help in  */
/* resolving 'endian' issues before test vectors became available). */
/*                                                                  */
/* Dr Brian Gladman (gladman@seven77.demon.co.uk) 27th July 1998    */
/*                                                                  */

/* The EES string is as follows (the abstract contains an error in 
   the last line of this sequence which changes KC and KD):

    0xb7e15162, 0x8aed2a6a, 0xbf715880, 0x9cf4f3c7, 
    0x62e7160f, 0x38b4da56, 0xa784d904, 0x5190cfef, 
    0x324e7738, 0x926cfbe5, 0xf4bf8d8d, 0x8c31d763,
    0xda06c80a, 0xbb1185eb, 0x4f7c7b57, 0x57f59584, 

    0x90cfd47d, 0x7c19bb42, 0x158d9554, 0xf7b46bce, 
    0xd55c4d79, 0xfd5f24d6, 0x613c31c3, 0x839a2ddf,
    0x8a9a276b, 0xcfbfa1c8, 0x77c56284, 0xdab79cd4, 
    0xc2b3293d, 0x20e9e5ea, 0xf02ac60a, 0xcc93ed87, 

    0x4422a52e, 0xcb238fee, 0xe5ab6add, 0x835fd1a0,
    0x753d0a8f, 0x78e537d2, 0xb95bb79d, 0x8dcaec64, 
    0x2c1e9f23, 0xb829b5c2, 0x780bf387, 0x37df8bb3, 
    0x00d01334, 0xa0d0bd86, 0x45cbfa73, 0xa6160ffe,

    0x393c48cb, 0xbbca060f, 0x0ff8ec6d, 0x31beb5cc, 
    0xeed7f2f0, 0xbb088017, 0x163bc60d, 0xf45a0ecb, 
    0x1bcd289b, 0x06cbbfea, 0x21ad08e1, 0x847f3f73,
    0x78d56ced, 0x94640d6e, 0xf0d3d37b, 0xe67008e1, 
    
    0x86d1bf27, 0x5b9b241d, 0xeb64749a, 0x47dfdfb9, 

    Where:

    EES = RT(0) | RT(1) | ... | RT(63) | KD | KC

    Note that the abstract describing DFC is written 
    in big endian notation with the most significant 
    digits of a sequence of digits placed at the low
    index positions in arrays. This format is used
    here and is only converted to machine format at
    the point that maths is done on any numbers in 
    the round function.
    
    The key input is thus treated as an array of 32 
    bit words numbered from 0..3, 0..5 or 0..7 
    depending on key length.  The first (leftmost) 
    bit of this key string as defined in the DFC 
    abstract is the most significant bit of word 0 
    and the rightmost bit of this string is the least 
    signicant bit of the highest numbered key word.

    The input and output blocks for the cipher are 
    also treated as arrays of 32 bit words numbered
    from 0..3.  The most significant bit of word 0 is
    the 1st (leftmost) bit of the 128 bit input string 
    and the least significant bit of word 3 is the 
    last (rightmost) bit.

    Here are some key, input and encrypted output 
    values for reference purposes:
    -----------------------------------------------
    key :00..03:00000000 00000000 00000000 00000000 
    inpt:00..03:00000000 00000000 00000000 00000000 
    oupt:00..03:f12dd230 7064907f 3b61da94 1efb2184 
    -----------------------------------------------
    key :00..03:00000000 00000000 00000000 00000000 
    key :04..05:00000000 00000000 
    inpt:00..03:00000000 00000000 00000000 00000000 
    oupt:00..03:7af7efb2 1aa1c246 fa6a4d6b c24fe909 
    -----------------------------------------------
    key :00..03:00000000 00000000 00000000 00000000 
    key :04..07:00000000 00000000 00000000 00000000 
    inpt:00..03:00000000 00000000 00000000 00000000 
    oupt:00..03:0205d713 3ed37cee 091c36f2 4a74592a 
    -----------------------------------------------
    key :00..03:00000000 00000000 00000000 00000000 
    inpt:00..03:00010203 04050607 08090a0b 0c0d0e0f 
    oupt:00..03:63042070 9e777ff6 cb1c6523 1362c5e3 
    -----------------------------------------------
    key :00..03:00000000 00000000 00000000 00000000 
    key :04..05:00000000 00000000 
    inpt:00..03:00010203 04050607 08090a0b 0c0d0e0f 
    oupt:00..03:7072a955 ccedbc49 03417213 c051f7ba 
    -----------------------------------------------
    key :00..03:00000000 00000000 00000000 00000000 
    key :04..07:00000000 00000000 00000000 00000000 
    inpt:00..03:00010203 04050607 08090a0b 0c0d0e0f 
    oupt:00..03:10fdbad1 0fa262f6 4147a00f 4306b11c 
    -----------------------------------------------
    key :00..03:00010203 04050607 08090a0b 0c0d0e0f 
    inpt:00..03:00000000 00000000 00000000 00000000 
    oupt:00..03:f33e6473 dc94928e c887ed51 950f1b35 
    -----------------------------------------------
    key :00..03:00010203 04050607 08090a0b f4f5f6f7 
    key :04..05:f8f9fafb fcfdfeff 
    inpt:00..03:00000000 00000000 00000000 00000000 
    oupt:00..03:cf0ce273 b1daae5d 1687caa4 003663d4 
    -----------------------------------------------
    key :00..03:00010203 04050607 08090a0b 0c0d0e0f 
    key :04..07:f0f1f2f3 f4f5f6f7 f8f9fafb fcfdfeff 
    inpt:00..03:00000000 00000000 00000000 00000000 
    oupt:00..03:5ceeeb6b 73b79f6b 5cc20f41 96b1d7b3 
    -----------------------------------------------
    key :00..03:00010203 04050607 08090a0b 0c0d0e0f 
    inpt:00..03:00010203 04050607 08090a0b 0c0d0e0f 
    oupt:00..03:dc853213 a1e283ca 3afc6f04 e7c1bf08 
    -----------------------------------------------
    key :00..03:00010203 04050607 08090a0b f4f5f6f7 
    key :04..05:f8f9fafb fcfdfeff 
    inpt:00..03:00010203 04050607 08090a0b 0c0d0e0f 
    oupt:00..03:94ca0910 5c4fd5c6 51297e50 3fdd47db 
    -----------------------------------------------
    key :00..03:00010203 04050607 08090a0b 0c0d0e0f 
    key :04..07:f0f1f2f3 f4f5f6f7 f8f9fafb fcfdfeff 
    inpt:00..03:00010203 04050607 08090a0b 0c0d0e0f 
    oupt:00..03:1c042426 df34fcec 01eab9a7 5e5c15ec 
    -----------------------------------------------
    key :00..03:01234567 89012345 67890123 45678901 
    inpt:00..03:00000000 00000000 00000000 00000000 
    oupt:00..03:bb46bb6a c0093c1d f5675766 16077eef 
    -----------------------------------------------
    key :00..03:01234567 89012345 67890123 45678901 
    key :04..05:23456789 01234567 
    inpt:00..03:00000000 00000000 00000000 00000000 
    oupt:00..03:6a650210 5c23c62a b01e6b32 4dcdf664 
    -----------------------------------------------
    key :00..03:01234567 89012345 67890123 45678901 
    key :04..07:23456789 01234567 89012345 67890123 
    inpt:00..03:00000000 00000000 00000000 00000000 
    oupt:00..03:68bf42dc f20d4ec7 aa081c03 822c6dba 
    -----------------------------------------------
*/

#include "../std_defs.h"

static char *alg_name = "DFC";

char *cipher_name()
{
    return alg_name;
};

/* The following arrays are all stored in big endian    */
/* format with 32 bit words at lower array positions    */
/* being more significant in multi-word values          */

u4byte rt64[64] = 
{
    0xb7e15162, 0x8aed2a6a, 0xbf715880, 0x9cf4f3c7, 
    0x62e7160f, 0x38b4da56, 0xa784d904, 0x5190cfef, 
    0x324e7738, 0x926cfbe5, 0xf4bf8d8d, 0x8c31d763,
    0xda06c80a, 0xbb1185eb, 0x4f7c7b57, 0x57f59584, 

    0x90cfd47d, 0x7c19bb42, 0x158d9554, 0xf7b46bce, 
    0xd55c4d79, 0xfd5f24d6, 0x613c31c3, 0x839a2ddf,
    0x8a9a276b, 0xcfbfa1c8, 0x77c56284, 0xdab79cd4, 
    0xc2b3293d, 0x20e9e5ea, 0xf02ac60a, 0xcc93ed87, 

    0x4422a52e, 0xcb238fee, 0xe5ab6add, 0x835fd1a0,
    0x753d0a8f, 0x78e537d2, 0xb95bb79d, 0x8dcaec64, 
    0x2c1e9f23, 0xb829b5c2, 0x780bf387, 0x37df8bb3, 
    0x00d01334, 0xa0d0bd86, 0x45cbfa73, 0xa6160ffe,

    0x393c48cb, 0xbbca060f, 0x0ff8ec6d, 0x31beb5cc, 
    0xeed7f2f0, 0xbb088017, 0x163bc60d, 0xf45a0ecb, 
    0x1bcd289b, 0x06cbbfea, 0x21ad08e1, 0x847f3f73,
    0x78d56ced, 0x94640d6e, 0xf0d3d37b, 0xe67008e1, 
};

u4byte  kc = 0xeb64749a;

u4byte  kd2[2] = 
{
    0x86d1bf27, 0x5b9b241d  
};

u4byte  ka2[6] = 
{
    0xb7e15162, 0x8aed2a6a, 
    0xbf715880, 0x9cf4f3c7, 
    0x62e7160f, 0x38b4da56, 
};

u4byte  kb2[6] =
{
    0xa784d904, 0x5190cfef, 
    0x324e7738, 0x926cfbe5, 
    0xf4bf8d8d, 0x8c31d763,
};

u4byte  ks8[8] = 
{   0xda06c80a, 0xbb1185eb, 0x4f7c7b57, 0x57f59584, 
    0x90cfd47d, 0x7c19bb42, 0x158d9554, 0xf7b46bce,
};

u4byte l_key[32];

/* The following maths functions use data in little endian format   */

/* multiply two 32 bit words to give a 64 bit result and add it to  */
/* the accumulator (acc) propagating any carries produced           */

#if(0)

void madd(u4byte acc[2], u4byte x[1], u4byte y[1])
{   u4byte  c, s, t;
    u2byte  r[4];

    /* compute upper and lower 32 bit products      */

    *(u4byte*)r = (u4byte)((u2byte*)x)[0] * ((u2byte*)y)[0]; 
        
    *(u4byte*)(r + 2) = (u4byte)((u2byte*)x)[1] * ((u2byte*)y)[1]; 

    /* compute and add intermediate products to r   */

    s = (u4byte)((u2byte*)x)[0] * ((u2byte*)y)[1]; 

    t = (u4byte)((u2byte*)x)[1] * ((u2byte*)y)[0]; 

    s += t; c = (s < t ? 1 : 0);

    t = *(u4byte*)(r + 1); 

    s += t; c += (s < t ? 1 : 0);

    *(u4byte*)(r + 1) = s;

    *(r + 3) += c;

    /* now add it to acc with carry propagation     */

    acc[0] += *(u4byte*)r; c = (acc[0] < *(u4byte*)r ? 1 : 0);

    acc[1] += (t = *(u4byte*)(r + 2)) + c; 
    
    c = (acc[1] < t ? 1 : (acc[1] == t ? c : 0));

    if(c && !++acc[2])

        ++acc[3];
};

#else

/* MFC inline assembly code speedup of madd. Note that  */
/* while Microsoft C++ has a 64 bit integer type, this  */
/* is (stupidly) only available in signed form and is   */
/* hence messy to use here                              */

void madd(u4byte acc[4], u4byte x[1], u4byte y[1])
{   __asm   {               
    __asm   mov ecx,x
    __asm   mov edx,y   
    __asm   mov eax,[ecx]
    __asm   mov ecx,[edx]
    __asm   mul ecx         
    __asm   mov ebx,acc     
    __asm   xor ecx,ecx     
    __asm   add [ebx],eax   
    __asm   adc [ebx+4],edx 
    __asm   adc [ebx+8],ecx
    __asm   adc [ebx+12],ecx
    }
};

#endif

/* Where necessary this is where conversion from big endian to  */
/* little endian format is performed.  Since all the maths is   */
/* little endian care is needed when 64 bit blocks are being    */
/* used to get them in the right order by reversing the order   */
/* in which these are stored. This applies to the key array     */
/* which gives the two values A and B and to the constant KD.   */
/* Since the input and output blocks are big endian we also     */
/* have to invert the order of the 32 bit words in the 64 bit   */
/* blocks being processed.                                      */ 

void r_fun(u4byte outp[2], const u4byte inp[2], const u4byte key[4])
{   u4byte acc4[6], t4[6], b, t;

    /* load the little endian accumulator with B - that is:		*/
	/* acc[0] = ls = key[3]) and acc[1] = ms = key[2]			*/

    acc4[0] = key[3]; acc4[1] = key[2]; acc4[2] = acc4[3] = 0;
    
    /* 64 bit multiply of input x (ls = inp[1], ms = inp[1])	*/
	/* and A (ls = key[1], ms = key[0]). Ensure that little		*/
	/* endian order is used for this maths						*/

    madd(acc4, inp + 1, key + 1);   /* least significant digits */
    madd(acc4 + 1, inp, key + 1); 
    madd(acc4 + 1, inp + 1, key);
    madd(acc4 + 2, inp, key);       /* most significant digits	*/

    /* we need the value in the accumulator mod 2^64 + 13 so if */
    /* the accumulator value is hi * 2^64 + lo we need to find  */
    /* a k value such that r = hi * 2^64 + lo - k * (2^64 + 13) */
    /* is 0 <= r < 2^64 + 13.  We can see that k will be close  */
    /* to hi in value - it may equal hi but will not be greater */
    /* and we can let k = hi - e with e >= 0 so that r is given */
    /* by r = e * (2^64 + 13) + lo - 13 * hi. If we compute the */
    /* lo - 13 * hi value, the overflow into the top 64 bits of */
    /* the accumulator has to be 'zeroed' by the e * (2^64 + 13)*/
    /* term and this sets the e value (in fact such an overlow  */
    /* is only removed when the lower word is higher than 12).	*/

    t4[0] = t4[1] = t4[2] = 0; b = 13;

    madd(t4, acc4 + 2, &b); madd(t4 + 1, acc4 + 3, &b);

    /* calculate lo - 13 * hi in acc4   */

    t = acc4[0]; acc4[0] -= t4[0]; b = (acc4[0] > t ? 1 : 0);

    t = acc4[1]; acc4[1] -= t4[1] + b;
    b = (acc4[1] > t ? 1 : (acc4[1] == t ? b : 0));

    b = 13 * (t4[2] + b);

    if(((acc4[0] += b) < b) && !(++acc4[1]))
    {
        if(acc4[0] > 12)

            acc4[0] -= 13;
    }

    /* do the confusion permutation (t4 is big endian format)	*/

    t4[1] = acc4[1] ^ kc; t4[0] = acc4[0] ^ rt64[acc4[1] >> 26];  
    
    t4[0] += kd2[0] + ((t4[1] += kd2[1]) < kd2[1] ? 1 : 0);

	outp[0] ^= t4[0]; outp[1] ^= t4[1]; 
};

u4byte *set_key(u4byte key_blk[], u4byte key_len)
{   u4byte  i, lk[32], rk[4];

    for(i = 0; i < key_len; ++i)

        lk[i] = key_blk[i]; // key_len - 1 - i];

    /* pad the key with the KS array            */

    for(i = 0; i < 8 - key_len; ++i)    /* K|KS */

        lk[i + key_len] = ks8[i];

    /* do the reordering of the key parameters  */
    /* the OAP[1]|OBP[1]|OAP[2]... sequence is  */
    /* at lk[0]... and the other at lk[16]...   */
    
    lk[18] = lk[5]; lk[19] = lk[2]; /* EBP */ 
    lk[16] = lk[1]; lk[17] = lk[6]; /* EAP */
    lk[ 2] = lk[4]; lk[ 3] = lk[3]; /* OBP */
    lk[ 0] = lk[0]; lk[ 1] = lk[7]; /* OAP */

    /* create other elements using KA and KB    */

    for(i = 0; i < 6; i += 2)
    {
        lk[i + i +   4] = lk[ 0] ^ ka2[i];      /* OAP[i] ms */
        lk[i + i +   5] = lk[ 1] ^ ka2[i + 1];  /* OAP[i] ls */
        lk[i + i +   6] = lk[ 2] ^ kb2[i];      /* OBP[i] ms */
        lk[i + i +   7] = lk[ 3] ^ kb2[i + 1];  /* OBP[i] ls */
        lk[i + i +  20] = lk[16] ^ ka2[i];      /* EAP[i] ms */
        lk[i + i +  21] = lk[17] ^ ka2[i + 1];  /* EAP[i] ls */
        lk[i + i +  22] = lk[18] ^ kb2[i];      /* EBP[i] ms */
        lk[i + i +  23] = lk[19] ^ kb2[i + 1];  /* EBP[i] ls */
    }

    rk[0] = rk[1] = rk[2] = rk[3] = 0;

    /* do the 4 round key mixing encryption     */

    for(i = 0; i < 32; i += 8)
    {
        r_fun(rk, rk + 2, lk);      /*  R2|R1   */
        r_fun(rk + 2, rk, lk +  4); /*  R2|R3   */
        r_fun(rk, rk + 2, lk +  8); /*  R4|R3   */
        r_fun(rk + 2, rk, lk + 12); /*  R4|R5   */

        /* keep key in big endian format with   */
        /* the most significant 32 bit words    */
        /* first (lowest) in the key schedule   */
        /* - note that the upper and lower 64   */
        /* bit blocks are in inverse order at   */
        /* this point in the loop               */

        l_key[i + 0] = rk[2]; l_key[i + 1] = rk[3]; 
        l_key[i + 2] = rk[0]; l_key[i + 3] = rk[1]; 

        r_fun(rk + 2, rk, lk + 16); /*  R1|R2   */
        r_fun(rk, rk + 2, lk + 20); /*  R3|R2   */
        r_fun(rk + 2, rk, lk + 24); /*  R3|R4   */  
        r_fun(rk, rk + 2, lk + 28); /*  R5|R4   */

        l_key[i + 4] = rk[0]; l_key[i + 5] = rk[1]; 
        l_key[i + 6] = rk[2]; l_key[i + 7] = rk[3];
    }
    
    return l_key;
};

void encrypt(u16byte in_blk, u16byte out_blk)
{   u16byte  blk;

    /* the input/output format is big endian -  */
    /* any reversals needed are performed when  */
    /* maths is done in the round function      */

    blk[0] = in_blk[0]; blk[1] = in_blk[1];
    blk[2] = in_blk[2]; blk[3] = in_blk[3];

    r_fun(blk, blk + 2, l_key +  0); /*  R2|R1  */ 
    r_fun(blk + 2, blk, l_key +  4); /*  R2|R3  */
    r_fun(blk, blk + 2, l_key +  8); /*  R4|R3  */
    r_fun(blk + 2, blk, l_key + 12); /*  R4|R5  */
    r_fun(blk, blk + 2, l_key + 16); /*  R6|R5  */
    r_fun(blk + 2, blk, l_key + 20); /*  R6|R7  */
    r_fun(blk, blk + 2, l_key + 24); /*  R8|R7  */
    r_fun(blk + 2, blk, l_key + 28); /*  R8|R9  */

    /* swap order to obtain the result R9|R8    */

    out_blk[0] = blk[2]; out_blk[1] = blk[3];
    out_blk[2] = blk[0]; out_blk[3] = blk[1];
};

void decrypt(u16byte in_blk, u16byte out_blk)
{   u16byte  blk;

    /* the input/output format is big endian -  */
    /* any reversals needed are performed when  */
    /* maths is done in the round function      */

    blk[0] = in_blk[0]; blk[1] = in_blk[1];
    blk[2] = in_blk[2]; blk[3] = in_blk[3];

    r_fun(blk, blk + 2, l_key + 28); /*  R7|R8  */
    r_fun(blk + 2, blk, l_key + 24); /*  R7|R6  */
    r_fun(blk, blk + 2, l_key + 20); /*  R5|R6  */
    r_fun(blk + 2, blk, l_key + 16); /*  R5|R4  */
    r_fun(blk, blk + 2, l_key + 12); /*  R3|R4  */
    r_fun(blk + 2, blk, l_key +  8); /*  R3|R2  */
    r_fun(blk, blk + 2, l_key +  4); /*  R1|R2  */
    r_fun(blk + 2, blk, l_key     ); /*  R1|R0  */ 

    /* swap order to obtain the result R1|R0    */

    out_blk[0] = blk[2]; out_blk[1] = blk[3];
    out_blk[2] = blk[0]; out_blk[3] = blk[1];   
};
