ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Crypt-Twofish2/twofish.c
Revision: 1.1
Committed: Sat Sep 6 22:10:54 2003 UTC (21 years, 2 months ago) by root
Content type: text/plain
Branch: MAIN
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 /***************************************************************************
2     TWOFISH2.C -- Optimized C API calls for TWOFISH AES submission
3    
4     Submitters:
5     Bruce Schneier, Counterpane Systems
6     Doug Whiting, Hi/fn
7     John Kelsey, Counterpane Systems
8     Chris Hall, Counterpane Systems
9     David Wagner, UC Berkeley
10    
11     Code Author: Doug Whiting, Hi/fn
12    
13     Version 1.00 April 1998
14    
15     Copyright 1998, Hi/fn and Counterpane Systems. All rights reserved.
16    
17     Notes:
18     * Optimized version
19     * Tab size is set to 4 characters in this file
20    
21     ***************************************************************************/
22     #include "aes.h"
23     #include "table.h"
24    
25     #include <memory.h>
26     /*#include <assert.h>*/
27    
28     #if defined(min_key) && !defined(MIN_KEY)
29     #define MIN_KEY 1 /* toupper() */
30     #elif defined(part_key) && !defined(PART_KEY)
31     #define PART_KEY 1
32     #elif defined(zero_key) && !defined(ZERO_KEY)
33     #define ZERO_KEY 1
34     #endif
35    
36    
37     #ifdef USE_ASM
38     extern int useAsm; /* ok to use ASM code? */
39    
40     typedef int cdecl CipherProc
41     (cipherInstance *cipher, keyInstance *key,BYTE *input,int inputLen,BYTE *outBuffer);
42     typedef int cdecl KeySetupProc(keyInstance *key);
43    
44     extern CipherProc *blockEncrypt_86; /* ptr to ASM functions */
45     extern CipherProc *blockDecrypt_86;
46     extern KeySetupProc *reKey_86;
47     extern DWORD cdecl TwofishAsmCodeSize(void);
48     #endif
49    
50     /*
51     +*****************************************************************************
52     * Constants/Macros/Tables
53     -****************************************************************************/
54    
55     #define CONST /* help syntax from C++, NOP here */
56    
57     CONST fullSbox MDStab; /* not actually const. Initialized ONE time */
58     int needToBuildMDS=1; /* is MDStab initialized yet? */
59    
60     #define BIG_TAB 0
61    
62     #if BIG_TAB
63     BYTE bigTab[4][256][256]; /* pre-computed S-box */
64     #endif
65    
66     /* number of rounds for various key sizes: 128, 192, 256 */
67     /* (ignored for now in optimized code!) */
68     CONST int numRounds[4]= {0,ROUNDS_128,ROUNDS_192,ROUNDS_256};
69    
70     #if REENTRANT
71     #define _sBox_ key->sBox8x32
72     #else
73     static fullSbox _sBox_; /* permuted MDStab based on keys */
74     #endif
75     #define _sBox8_(N) (((BYTE *) _sBox_) + (N)*256)
76    
77     /*------- see what level of S-box precomputation we need to do -----*/
78     #if defined(ZERO_KEY)
79     #define MOD_STRING "(Zero S-box keying)"
80     #define Fe32_128(x,R) \
81     ( MDStab[0][p8(01)[p8(02)[_b(x,R )]^b0(SKEY[1])]^b0(SKEY[0])] ^ \
82     MDStab[1][p8(11)[p8(12)[_b(x,R+1)]^b1(SKEY[1])]^b1(SKEY[0])] ^ \
83     MDStab[2][p8(21)[p8(22)[_b(x,R+2)]^b2(SKEY[1])]^b2(SKEY[0])] ^ \
84     MDStab[3][p8(31)[p8(32)[_b(x,R+3)]^b3(SKEY[1])]^b3(SKEY[0])] )
85     #define Fe32_192(x,R) \
86     ( MDStab[0][p8(01)[p8(02)[p8(03)[_b(x,R )]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \
87     MDStab[1][p8(11)[p8(12)[p8(13)[_b(x,R+1)]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \
88     MDStab[2][p8(21)[p8(22)[p8(23)[_b(x,R+2)]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \
89     MDStab[3][p8(31)[p8(32)[p8(33)[_b(x,R+3)]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] )
90     #define Fe32_256(x,R) \
91     ( MDStab[0][p8(01)[p8(02)[p8(03)[p8(04)[_b(x,R )]^b0(SKEY[3])]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \
92     MDStab[1][p8(11)[p8(12)[p8(13)[p8(14)[_b(x,R+1)]^b1(SKEY[3])]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \
93     MDStab[2][p8(21)[p8(22)[p8(23)[p8(24)[_b(x,R+2)]^b2(SKEY[3])]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \
94     MDStab[3][p8(31)[p8(32)[p8(33)[p8(34)[_b(x,R+3)]^b3(SKEY[3])]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] )
95    
96     #define GetSboxKey DWORD SKEY[4]; /* local copy */ \
97     memcpy(SKEY,key->sboxKeys,sizeof(SKEY));
98     /*----------------------------------------------------------------*/
99     #elif defined(MIN_KEY)
100     #define MOD_STRING "(Minimal keying)"
101     #define Fe32_(x,R)(MDStab[0][p8(01)[_sBox8_(0)[_b(x,R )]] ^ b0(SKEY0)] ^ \
102     MDStab[1][p8(11)[_sBox8_(1)[_b(x,R+1)]] ^ b1(SKEY0)] ^ \
103     MDStab[2][p8(21)[_sBox8_(2)[_b(x,R+2)]] ^ b2(SKEY0)] ^ \
104     MDStab[3][p8(31)[_sBox8_(3)[_b(x,R+3)]] ^ b3(SKEY0)])
105     #define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; }
106     #define GetSboxKey DWORD SKEY0 = key->sboxKeys[0] /* local copy */
107     /*----------------------------------------------------------------*/
108     #elif defined(PART_KEY)
109     #define MOD_STRING "(Partial keying)"
110     #define Fe32_(x,R)(MDStab[0][_sBox8_(0)[_b(x,R )]] ^ \
111     MDStab[1][_sBox8_(1)[_b(x,R+1)]] ^ \
112     MDStab[2][_sBox8_(2)[_b(x,R+2)]] ^ \
113     MDStab[3][_sBox8_(3)[_b(x,R+3)]])
114     #define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; }
115     #define GetSboxKey
116     /*----------------------------------------------------------------*/
117     #else /* default is FULL_KEY */
118     #ifndef FULL_KEY
119     #define FULL_KEY 1
120     #endif
121     #if BIG_TAB
122     #define TAB_STR " (Big table)"
123     #else
124     #define TAB_STR
125     #endif
126     #ifdef COMPILE_KEY
127     #define MOD_STRING "(Compiled subkeys)" TAB_STR
128     #else
129     #define MOD_STRING "(Full keying)" TAB_STR
130     #endif
131     /* Fe32_ does a full S-box + MDS lookup. Need to #define _sBox_ before use.
132     Note that we "interleave" 0,1, and 2,3 to avoid cache bank collisions
133     in optimized assembly language.
134     */
135     #define Fe32_(x,R) (_sBox_[0][2*_b(x,R )] ^ _sBox_[0][2*_b(x,R+1)+1] ^ \
136     _sBox_[2][2*_b(x,R+2)] ^ _sBox_[2][2*_b(x,R+3)+1])
137     /* set a single S-box value, given the input byte */
138     #define sbSet(N,i,J,v) { _sBox_[N&2][2*i+(N&1)+2*J]=MDStab[N][v]; }
139     #define GetSboxKey
140     #endif
141    
142     /* macro(s) for debugging help */
143     #define CHECK_TABLE 0 /* nonzero --> compare against "slow" table */
144     #define VALIDATE_PARMS 0 /* disable for full speed */
145    
146     /* end of debug macros */
147    
148     #ifdef GetCodeSize
149     extern DWORD Here(DWORD x); /* return caller's address! */
150     DWORD TwofishCodeStart(void) { return Here(0); }
151     #endif
152    
153     /*
154     +*****************************************************************************
155     *
156     * Function Name: TableOp
157     *
158     * Function: Handle table use checking
159     *
160     * Arguments: op = what to do (see TAB_* defns in AES.H)
161     *
162     * Return: TRUE --> done (for TAB_QUERY)
163     *
164     * Notes: This routine is for use in generating the tables KAT file.
165     * For this optimized version, we don't actually track table usage,
166     * since it would make the macros incredibly ugly. Instead we just
167     * run for a fixed number of queries and then say we're done.
168     *
169     -****************************************************************************/
170     int TableOp(int op)
171     {
172     static int queryCnt=0;
173    
174     switch (op)
175     {
176     case TAB_DISABLE:
177     break;
178     case TAB_ENABLE:
179     break;
180     case TAB_RESET:
181     queryCnt=0;
182     break;
183     case TAB_QUERY:
184     queryCnt++;
185     if (queryCnt < TAB_MIN_QUERY)
186     return FALSE;
187     }
188     return TRUE;
189     }
190    
191    
192     #if CHECK_TABLE
193     /*
194     +*****************************************************************************
195     *
196     * Function Name: f32
197     *
198     * Function: Run four bytes through keyed S-boxes and apply MDS matrix
199     *
200     * Arguments: x = input to f function
201     * k32 = pointer to key dwords
202     * keyLen = total key length (k32 --> keyLey/2 bits)
203     *
204     * Return: The output of the keyed permutation applied to x.
205     *
206     * Notes:
207     * This function is a keyed 32-bit permutation. It is the major building
208     * block for the Twofish round function, including the four keyed 8x8
209     * permutations and the 4x4 MDS matrix multiply. This function is used
210     * both for generating round subkeys and within the round function on the
211     * block being encrypted.
212     *
213     * This version is fairly slow and pedagogical, although a smartcard would
214     * probably perform the operation exactly this way in firmware. For
215     * ultimate performance, the entire operation can be completed with four
216     * lookups into four 256x32-bit tables, with three dword xors.
217     *
218     * The MDS matrix is defined in TABLE.H. To multiply by Mij, just use the
219     * macro Mij(x).
220     *
221     -****************************************************************************/
222     DWORD f32(DWORD x,CONST DWORD *k32,int keyLen)
223     {
224     BYTE b[4];
225    
226     /* Run each byte thru 8x8 S-boxes, xoring with key byte at each stage. */
227     /* Note that each byte goes through a different combination of S-boxes.*/
228    
229     *((DWORD *)b) = Bswap(x); /* make b[0] = LSB, b[3] = MSB */
230     switch (((keyLen + 63)/64) & 3)
231     {
232     case 0: /* 256 bits of key */
233     b[0] = p8(04)[b[0]] ^ b0(k32[3]);
234     b[1] = p8(14)[b[1]] ^ b1(k32[3]);
235     b[2] = p8(24)[b[2]] ^ b2(k32[3]);
236     b[3] = p8(34)[b[3]] ^ b3(k32[3]);
237     /* fall thru, having pre-processed b[0]..b[3] with k32[3] */
238     case 3: /* 192 bits of key */
239     b[0] = p8(03)[b[0]] ^ b0(k32[2]);
240     b[1] = p8(13)[b[1]] ^ b1(k32[2]);
241     b[2] = p8(23)[b[2]] ^ b2(k32[2]);
242     b[3] = p8(33)[b[3]] ^ b3(k32[2]);
243     /* fall thru, having pre-processed b[0]..b[3] with k32[2] */
244     case 2: /* 128 bits of key */
245     b[0] = p8(00)[p8(01)[p8(02)[b[0]] ^ b0(k32[1])] ^ b0(k32[0])];
246     b[1] = p8(10)[p8(11)[p8(12)[b[1]] ^ b1(k32[1])] ^ b1(k32[0])];
247     b[2] = p8(20)[p8(21)[p8(22)[b[2]] ^ b2(k32[1])] ^ b2(k32[0])];
248     b[3] = p8(30)[p8(31)[p8(32)[b[3]] ^ b3(k32[1])] ^ b3(k32[0])];
249     }
250    
251     /* Now perform the MDS matrix multiply inline. */
252     return ((M00(b[0]) ^ M01(b[1]) ^ M02(b[2]) ^ M03(b[3])) ) ^
253     ((M10(b[0]) ^ M11(b[1]) ^ M12(b[2]) ^ M13(b[3])) << 8) ^
254     ((M20(b[0]) ^ M21(b[1]) ^ M22(b[2]) ^ M23(b[3])) << 16) ^
255     ((M30(b[0]) ^ M31(b[1]) ^ M32(b[2]) ^ M33(b[3])) << 24) ;
256     }
257     #endif /* CHECK_TABLE */
258    
259    
260     /*
261     +*****************************************************************************
262     *
263     * Function Name: RS_MDS_encode
264     *
265     * Function: Use (12,8) Reed-Solomon code over GF(256) to produce
266     * a key S-box dword from two key material dwords.
267     *
268     * Arguments: k0 = 1st dword
269     * k1 = 2nd dword
270     *
271     * Return: Remainder polynomial generated using RS code
272     *
273     * Notes:
274     * Since this computation is done only once per reKey per 64 bits of key,
275     * the performance impact of this routine is imperceptible. The RS code
276     * chosen has "simple" coefficients to allow smartcard/hardware implementation
277     * without lookup tables.
278     *
279     -****************************************************************************/
280     DWORD RS_MDS_Encode(DWORD k0,DWORD k1)
281     {
282     int i,j;
283     DWORD r;
284    
285     for (i=r=0;i<2;i++)
286     {
287     r ^= (i) ? k0 : k1; /* merge in 32 more key bits */
288     for (j=0;j<4;j++) /* shift one byte at a time */
289     RS_rem(r);
290     }
291     return r;
292     }
293    
294    
295     /*
296     +*****************************************************************************
297     *
298     * Function Name: BuildMDS
299     *
300     * Function: Initialize the MDStab array
301     *
302     * Arguments: None.
303     *
304     * Return: None.
305     *
306     * Notes:
307     * Here we precompute all the fixed MDS table. This only needs to be done
308     * one time at initialization, after which the table is "CONST".
309     *
310     -****************************************************************************/
311     void BuildMDS(void)
312     {
313     int i;
314     DWORD d;
315     BYTE m1[2],mX[2],mY[4];
316    
317     for (i=0;i<256;i++)
318     {
319     m1[0]=P8x8[0][i]; /* compute all the matrix elements */
320     mX[0]=(BYTE) Mul_X(m1[0]);
321     mY[0]=(BYTE) Mul_Y(m1[0]);
322    
323     m1[1]=P8x8[1][i];
324     mX[1]=(BYTE) Mul_X(m1[1]);
325     mY[1]=(BYTE) Mul_Y(m1[1]);
326    
327     #undef Mul_1 /* change what the pre-processor does with Mij */
328     #undef Mul_X
329     #undef Mul_Y
330     #define Mul_1 m1 /* It will now access m01[], m5B[], and mEF[] */
331     #define Mul_X mX
332     #define Mul_Y mY
333    
334     #define SetMDS(N) \
335     b0(d) = M0##N[P_##N##0]; \
336     b1(d) = M1##N[P_##N##0]; \
337     b2(d) = M2##N[P_##N##0]; \
338     b3(d) = M3##N[P_##N##0]; \
339     MDStab[N][i] = d;
340    
341     SetMDS(0); /* fill in the matrix with elements computed above */
342     SetMDS(1);
343     SetMDS(2);
344     SetMDS(3);
345     }
346     #undef Mul_1
347     #undef Mul_X
348     #undef Mul_Y
349     #define Mul_1 Mx_1 /* re-enable true multiply */
350     #define Mul_X Mx_X
351     #define Mul_Y Mx_Y
352    
353     #if BIG_TAB
354     {
355     int j,k;
356     BYTE *q0,*q1;
357    
358     for (i=0;i<4;i++)
359     {
360     switch (i)
361     {
362     case 0: q0=p8(01); q1=p8(02); break;
363     case 1: q0=p8(11); q1=p8(12); break;
364     case 2: q0=p8(21); q1=p8(22); break;
365     case 3: q0=p8(31); q1=p8(32); break;
366     }
367     for (j=0;j<256;j++)
368     for (k=0;k<256;k++)
369     bigTab[i][j][k]=q0[q1[k]^j];
370     }
371     }
372     #endif
373    
374     needToBuildMDS=0; /* NEVER modify the table again! */
375     }
376    
377     /*
378     +*****************************************************************************
379     *
380     * Function Name: ReverseRoundSubkeys
381     *
382     * Function: Reverse order of round subkeys to switch between encrypt/decrypt
383     *
384     * Arguments: key = ptr to keyInstance to be reversed
385     * newDir = new direction value
386     *
387     * Return: None.
388     *
389     * Notes:
390     * This optimization allows both blockEncrypt and blockDecrypt to use the same
391     * "fallthru" switch statement based on the number of rounds.
392     * Note that key->numRounds must be even and >= 2 here.
393     *
394     -****************************************************************************/
395     void ReverseRoundSubkeys(keyInstance *key,BYTE newDir)
396     {
397     DWORD t0,t1;
398     register DWORD *r0=key->subKeys+ROUND_SUBKEYS;
399     register DWORD *r1=r0 + 2*key->numRounds - 2;
400    
401     for (;r0 < r1;r0+=2,r1-=2)
402     {
403     t0=r0[0]; /* swap the order */
404     t1=r0[1];
405     r0[0]=r1[0]; /* but keep relative order within pairs */
406     r0[1]=r1[1];
407     r1[0]=t0;
408     r1[1]=t1;
409     }
410    
411     key->direction=newDir;
412     }
413    
414     /*
415     +*****************************************************************************
416     *
417     * Function Name: Xor256
418     *
419     * Function: Copy an 8-bit permutation (256 bytes), xoring with a byte
420     *
421     * Arguments: dst = where to put result
422     * src = where to get data (can be same asa dst)
423     * b = byte to xor
424     *
425     * Return: None
426     *
427     * Notes:
428     * BorlandC's optimization is terrible! When we put the code inline,
429     * it generates fairly good code in the *following* segment (not in the Xor256
430     * code itself). If the call is made, the code following the call is awful!
431     * The penalty is nearly 50%! So we take the code size hit for inlining for
432     * Borland, while Microsoft happily works with a call.
433     *
434     -****************************************************************************/
435     #if defined(__BORLANDC__) /* do it inline */
436     #define Xor32(dst,src,i) { ((DWORD *)dst)[i] = ((DWORD *)src)[i] ^ tmpX; }
437     #define Xor256(dst,src,b) \
438     { \
439     register DWORD tmpX=0x01010101u * b;\
440     for (i=0;i<64;i+=4) \
441     { Xor32(dst,src,i ); Xor32(dst,src,i+1); Xor32(dst,src,i+2); Xor32(dst,src,i+3); } \
442     }
443     #else /* do it as a function call */
444     void Xor256(void *dst,void *src,BYTE b)
445     {
446     register DWORD x=b*0x01010101u; /* replicate byte to all four bytes */
447     register DWORD *d=(DWORD *)dst;
448     register DWORD *s=(DWORD *)src;
449     #define X_8(N) { d[N]=s[N] ^ x; d[N+1]=s[N+1] ^ x; }
450     #define X_32(N) { X_8(N); X_8(N+2); X_8(N+4); X_8(N+6); }
451     X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */
452     d+=32; /* keep offsets small! */
453     s+=32;
454     X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */
455     }
456     #endif
457    
458     /*
459     +*****************************************************************************
460     *
461     * Function Name: reKey
462     *
463     * Function: Initialize the Twofish key schedule from key32
464     *
465     * Arguments: key = ptr to keyInstance to be initialized
466     *
467     * Return: TRUE on success
468     *
469     * Notes:
470     * Here we precompute all the round subkeys, although that is not actually
471     * required. For example, on a smartcard, the round subkeys can
472     * be generated on-the-fly using f32()
473     *
474     -****************************************************************************/
475     int reKey(keyInstance *key)
476     {
477     int i,j,k64Cnt,keyLen;
478     int subkeyCnt;
479     DWORD A=0,B=0,q;
480     DWORD sKey[MAX_KEY_BITS/64],k32e[MAX_KEY_BITS/64],k32o[MAX_KEY_BITS/64];
481     BYTE L0[256],L1[256]; /* small local 8-bit permutations */
482    
483     #if VALIDATE_PARMS
484     #if ALIGN32
485     if (((int)key) & 3)
486     return BAD_ALIGN32;
487     if ((key->keyLen % 64) || (key->keyLen < MIN_KEY_BITS))
488     return BAD_KEY_INSTANCE;
489     #endif
490     #endif
491    
492     if (needToBuildMDS) /* do this one time only */
493     BuildMDS();
494    
495     #define F32(res,x,k32) \
496     { \
497     DWORD t=x; \
498     switch (k64Cnt & 3) \
499     { \
500     case 0: /* same as 4 */ \
501     b0(t) = p8(04)[b0(t)] ^ b0(k32[3]); \
502     b1(t) = p8(14)[b1(t)] ^ b1(k32[3]); \
503     b2(t) = p8(24)[b2(t)] ^ b2(k32[3]); \
504     b3(t) = p8(34)[b3(t)] ^ b3(k32[3]); \
505     /* fall thru, having pre-processed t */ \
506     case 3: b0(t) = p8(03)[b0(t)] ^ b0(k32[2]); \
507     b1(t) = p8(13)[b1(t)] ^ b1(k32[2]); \
508     b2(t) = p8(23)[b2(t)] ^ b2(k32[2]); \
509     b3(t) = p8(33)[b3(t)] ^ b3(k32[2]); \
510     /* fall thru, having pre-processed t */ \
511     case 2: /* 128-bit keys (optimize for this case) */ \
512     res= MDStab[0][p8(01)[p8(02)[b0(t)] ^ b0(k32[1])] ^ b0(k32[0])] ^ \
513     MDStab[1][p8(11)[p8(12)[b1(t)] ^ b1(k32[1])] ^ b1(k32[0])] ^ \
514     MDStab[2][p8(21)[p8(22)[b2(t)] ^ b2(k32[1])] ^ b2(k32[0])] ^ \
515     MDStab[3][p8(31)[p8(32)[b3(t)] ^ b3(k32[1])] ^ b3(k32[0])] ; \
516     } \
517     }
518    
519    
520     #if !CHECK_TABLE
521     #if defined(USE_ASM) /* only do this if not using assember */
522     if (!(useAsm & 4))
523     #endif
524     #endif
525     {
526     subkeyCnt = ROUND_SUBKEYS + 2*key->numRounds;
527     keyLen=key->keyLen;
528     k64Cnt=(keyLen+63)/64; /* number of 64-bit key words */
529     for (i=0,j=k64Cnt-1;i<k64Cnt;i++,j--)
530     { /* split into even/odd key dwords */
531     k32e[i]=key->key32[2*i ];
532     k32o[i]=key->key32[2*i+1];
533     /* compute S-box keys using (12,8) Reed-Solomon code over GF(256) */
534     sKey[j]=key->sboxKeys[j]=RS_MDS_Encode(k32e[i],k32o[i]); /* reverse order */
535     }
536     }
537    
538     #ifdef USE_ASM
539     if (useAsm & 4)
540     {
541     #if defined(COMPILE_KEY) && defined(USE_ASM)
542     key->keySig = VALID_SIG; /* show that we are initialized */
543     key->codeSize = sizeof(key->compiledCode); /* set size */
544     #endif
545     reKey_86(key);
546     }
547     else
548     #endif
549     {
550     for (i=q=0;i<subkeyCnt/2;i++,q+=SK_STEP)
551     { /* compute round subkeys for PHT */
552     F32(A,q ,k32e); /* A uses even key dwords */
553     F32(B,q+SK_BUMP,k32o); /* B uses odd key dwords */
554     B = ROL(B,8);
555     key->subKeys[2*i ] = A+B; /* combine with a PHT */
556     B = A + 2*B;
557     key->subKeys[2*i+1] = ROL(B,SK_ROTL);
558     }
559     #if !defined(ZERO_KEY)
560     switch (keyLen) /* case out key length for speed in generating S-boxes */
561     {
562     case 128:
563     #if defined(FULL_KEY) || defined(PART_KEY)
564     #if BIG_TAB
565     #define one128(N,J) sbSet(N,i,J,L0[i+J])
566     #define sb128(N) { \
567     BYTE *qq=bigTab[N][b##N(sKey[1])]; \
568     Xor256(L0,qq,b##N(sKey[0])); \
569     for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } }
570     #else
571     #define one128(N,J) sbSet(N,i,J,p8(N##1)[L0[i+J]]^k0)
572     #define sb128(N) { \
573     Xor256(L0,p8(N##2),b##N(sKey[1])); \
574     { register DWORD k0=b##N(sKey[0]); \
575     for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } } }
576     #endif
577     #elif defined(MIN_KEY)
578     #define sb128(N) Xor256(_sBox8_(N),p8(N##2),b##N(sKey[1]))
579     #endif
580     sb128(0); sb128(1); sb128(2); sb128(3);
581     break;
582     case 192:
583     #if defined(FULL_KEY) || defined(PART_KEY)
584     #define one192(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0)
585     #define sb192(N) { \
586     Xor256(L0,p8(N##3),b##N(sKey[2])); \
587     { register DWORD k0=b##N(sKey[0]); \
588     register DWORD k1=b##N(sKey[1]); \
589     for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } }
590     #elif defined(MIN_KEY)
591     #define one192(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1)
592     #define sb192(N) { \
593     Xor256(L0,p8(N##3),b##N(sKey[2])); \
594     { register DWORD k1=b##N(sKey[1]); \
595     for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } }
596     #endif
597     sb192(0); sb192(1); sb192(2); sb192(3);
598     break;
599     case 256:
600     #if defined(FULL_KEY) || defined(PART_KEY)
601     #define one256(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0)
602     #define sb256(N) { \
603     Xor256(L1,p8(N##4),b##N(sKey[3])); \
604     for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \
605     L0[i+1]=p8(N##3)[L1[i+1]]; } \
606     Xor256(L0,L0,b##N(sKey[2])); \
607     { register DWORD k0=b##N(sKey[0]); \
608     register DWORD k1=b##N(sKey[1]); \
609     for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } }
610     #elif defined(MIN_KEY)
611     #define one256(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1)
612     #define sb256(N) { \
613     Xor256(L1,p8(N##4),b##N(sKey[3])); \
614     for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \
615     L0[i+1]=p8(N##3)[L1[i+1]]; } \
616     Xor256(L0,L0,b##N(sKey[2])); \
617     { register DWORD k1=b##N(sKey[1]); \
618     for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } }
619     #endif
620     sb256(0); sb256(1); sb256(2); sb256(3);
621     break;
622     }
623     #endif
624     }
625    
626     #if CHECK_TABLE /* sanity check vs. pedagogical code*/
627     {
628     GetSboxKey;
629     for (i=0;i<subkeyCnt/2;i++)
630     {
631     A = f32(i*SK_STEP ,k32e,keyLen); /* A uses even key dwords */
632     B = f32(i*SK_STEP+SK_BUMP,k32o,keyLen); /* B uses odd key dwords */
633     B = ROL(B,8);
634     assert(key->subKeys[2*i ] == A+ B);
635     assert(key->subKeys[2*i+1] == ROL(A+2*B,SK_ROTL));
636     }
637     #if !defined(ZERO_KEY) /* any S-boxes to check? */
638     for (i=q=0;i<256;i++,q+=0x01010101)
639     assert(f32(q,key->sboxKeys,keyLen) == Fe32_(q,0));
640     #endif
641     }
642     #endif /* CHECK_TABLE */
643    
644     if (key->direction == DIR_ENCRYPT)
645     ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */
646    
647     return TRUE;
648     }
649     /*
650     +*****************************************************************************
651     *
652     * Function Name: makeKey
653     *
654     * Function: Initialize the Twofish key schedule
655     *
656     * Arguments: key = ptr to keyInstance to be initialized
657     * direction = DIR_ENCRYPT or DIR_DECRYPT
658     * keyLen = # bits of key text at *keyMaterial
659     * keyMaterial = ptr to hex ASCII chars representing key bits
660     *
661     * Return: TRUE on success
662     * else error code (e.g., BAD_KEY_DIR)
663     *
664     * Notes: This parses the key bits from keyMaterial. Zeroes out unused key bits
665     *
666     -****************************************************************************/
667     int makeKey(keyInstance *key, BYTE direction, int keyLen,CONST char *keyMaterial)
668     {
669     int i;
670    
671     #if VALIDATE_PARMS /* first, sanity check on parameters */
672     if (key == NULL)
673     return BAD_KEY_INSTANCE;/* must have a keyInstance to initialize */
674     if ((direction != DIR_ENCRYPT) && (direction != DIR_DECRYPT))
675     return BAD_KEY_DIR; /* must have valid direction */
676     if ((keyLen > MAX_KEY_BITS) || (keyLen < 8) || (keyLen & 0x3F))
677     return BAD_KEY_MAT; /* length must be valid */
678     key->keySig = VALID_SIG; /* show that we are initialized */
679     #if ALIGN32
680     if ((((int)key) & 3) || (((int)key->key32) & 3))
681     return BAD_ALIGN32;
682     #endif
683     #endif
684    
685     key->direction = direction;/* set our cipher direction */
686     key->keyLen = (keyLen+63) & ~63; /* round up to multiple of 64 */
687     key->numRounds = numRounds[(keyLen-1)/64];
688     memset(key->key32,0,sizeof(key->key32)); /* zero unused bits */
689    
690     if (keyMaterial == NULL)
691     return TRUE; /* allow a "dummy" call */
692    
693     for (i=0;i<keyLen/32;i++) /* make byte-oriented copy for CFB1 */
694     key->key32[i] = (((unsigned char *)keyMaterial)[i*4+0] << 0)
695     | (((unsigned char *)keyMaterial)[i*4+1] << 8)
696     | (((unsigned char *)keyMaterial)[i*4+2] << 16)
697     | (((unsigned char *)keyMaterial)[i*4+3] << 24);
698    
699     return reKey(key); /* generate round subkeys */
700     }
701    
702    
703     /*
704     +*****************************************************************************
705     *
706     * Function Name: cipherInit
707     *
708     * Function: Initialize the Twofish cipher in a given mode
709     *
710     * Arguments: cipher = ptr to cipherInstance to be initialized
711     * mode = MODE_ECB, MODE_CBC, or MODE_CFB1
712     * IV = ptr to hex ASCII test representing IV bytes
713     *
714     * Return: TRUE on success
715     * else error code (e.g., BAD_CIPHER_MODE)
716     *
717     -****************************************************************************/
718     int cipherInit(cipherInstance *cipher, BYTE mode,CONST char *IV)
719     {
720     int i;
721     #if VALIDATE_PARMS /* first, sanity check on parameters */
722     if (cipher == NULL)
723     return BAD_PARAMS; /* must have a cipherInstance to initialize */
724     if ((mode != MODE_ECB) && (mode != MODE_CBC) && (mode != MODE_CFB1))
725     return BAD_CIPHER_MODE; /* must have valid cipher mode */
726     cipher->cipherSig = VALID_SIG;
727     #if ALIGN32
728     if ((((int)cipher) & 3) || (((int)cipher->IV) & 3) || (((int)cipher->iv32) & 3))
729     return BAD_ALIGN32;
730     #endif
731     #endif
732    
733     if ((mode != MODE_ECB) && (IV)) /* parse the IV */
734     {
735     memcpy (cipher->iv32, IV, BLOCK_SIZE/32);
736     for (i=0;i<BLOCK_SIZE/32;i++) /* make byte-oriented copy for CFB1 */
737     ((DWORD *)cipher->IV)[i] = Bswap(cipher->iv32[i]);
738     }
739    
740     cipher->mode = mode;
741    
742     return TRUE;
743     }
744    
745     /*
746     +*****************************************************************************
747     *
748     * Function Name: blockEncrypt
749     *
750     * Function: Encrypt block(s) of data using Twofish
751     *
752     * Arguments: cipher = ptr to already initialized cipherInstance
753     * key = ptr to already initialized keyInstance
754     * input = ptr to data blocks to be encrypted
755     * inputLen = # bits to encrypt (multiple of blockSize)
756     * outBuffer = ptr to where to put encrypted blocks
757     *
758     * Return: # bits ciphered (>= 0)
759     * else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL)
760     *
761     * Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits.
762     * If inputLen is not a multiple of BLOCK_SIZE bits in those modes,
763     * an error BAD_INPUT_LEN is returned. In CFB1 mode, all block
764     * sizes can be supported.
765     *
766     -****************************************************************************/
767     int blockEncrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input,
768     int inputLen, BYTE *outBuffer)
769     {
770     int i,n; /* loop counters */
771     DWORD x[BLOCK_SIZE/32]; /* block being encrypted */
772     DWORD t0,t1; /* temp variables */
773     int rounds=key->numRounds; /* number of rounds */
774     BYTE bit,bit0,ctBit,carry; /* temps for CFB */
775    
776     /* make local copies of things for faster access */
777     int mode = cipher->mode;
778     DWORD sk[TOTAL_SUBKEYS];
779     DWORD IV[BLOCK_SIZE/32];
780    
781     GetSboxKey;
782    
783     #if VALIDATE_PARMS
784     if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG))
785     return BAD_CIPHER_STATE;
786     if ((key == NULL) || (key->keySig != VALID_SIG))
787     return BAD_KEY_INSTANCE;
788     if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1))
789     return BAD_KEY_INSTANCE;
790     if ((mode != MODE_CFB1) && (inputLen % BLOCK_SIZE))
791     return BAD_INPUT_LEN;
792     #if ALIGN32
793     if ( (((int)cipher) & 3) || (((int)key ) & 3) ||
794     (((int)input ) & 3) || (((int)outBuffer) & 3))
795     return BAD_ALIGN32;
796     #endif
797     #endif
798    
799     if (mode == MODE_CFB1)
800     { /* use recursion here to handle CFB, one block at a time */
801     cipher->mode = MODE_ECB; /* do encryption in ECB */
802     for (n=0;n<inputLen;n++)
803     {
804     blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x);
805     bit0 = 0x80 >> (n & 7);/* which bit position in byte */
806     ctBit = (input[n/8] & bit0) ^ ((((BYTE *) x)[0] & 0x80) >> (n&7));
807     outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) | ctBit;
808     carry = ctBit >> (7 - (n&7));
809     for (i=BLOCK_SIZE/8-1;i>=0;i--)
810     {
811     bit = cipher->IV[i] >> 7; /* save next "carry" from shift */
812     cipher->IV[i] = (cipher->IV[i] << 1) ^ carry;
813     carry = bit;
814     }
815     }
816     cipher->mode = MODE_CFB1; /* restore mode for next time */
817     return inputLen;
818     }
819    
820     /* here for ECB, CBC modes */
821     if (key->direction != DIR_ENCRYPT)
822     ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */
823    
824     #ifdef USE_ASM
825     if ((useAsm & 1) && (inputLen))
826     #ifdef COMPILE_KEY
827     if (key->keySig == VALID_SIG)
828     return ((CipherProc *)(key->encryptFuncPtr))(cipher,key,input,inputLen,outBuffer);
829     #else
830     return (*blockEncrypt_86)(cipher,key,input,inputLen,outBuffer);
831     #endif
832     #endif
833     /* make local copy of subkeys for speed */
834     memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds));
835     if (mode == MODE_CBC)
836     BlockCopy(IV,cipher->iv32)
837     else
838     IV[0]=IV[1]=IV[2]=IV[3]=0;
839    
840     for (n=0;n<inputLen;n+=BLOCK_SIZE,input+=BLOCK_SIZE/8,outBuffer+=BLOCK_SIZE/8)
841     {
842     #define LoadBlockE(N) x[N]=Bswap(((DWORD *)input)[N]) ^ sk[INPUT_WHITEN+N] ^ IV[N]
843     LoadBlockE(0); LoadBlockE(1); LoadBlockE(2); LoadBlockE(3);
844     #define EncryptRound(K,R,id) \
845     t0 = Fe32##id(x[K ],0); \
846     t1 = Fe32##id(x[K^1],3); \
847     x[K^3] = ROL(x[K^3],1); \
848     x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \
849     x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \
850     x[K^2] = ROR(x[K^2],1);
851     #define Encrypt2(R,id) { EncryptRound(0,R+1,id); EncryptRound(2,R,id); }
852    
853     #if defined(ZERO_KEY)
854     switch (key->keyLen)
855     {
856     case 128:
857     for (i=rounds-2;i>=0;i-=2)
858     Encrypt2(i,_128);
859     break;
860     case 192:
861     for (i=rounds-2;i>=0;i-=2)
862     Encrypt2(i,_192);
863     break;
864     case 256:
865     for (i=rounds-2;i>=0;i-=2)
866     Encrypt2(i,_256);
867     break;
868     }
869     #else
870     Encrypt2(14,_);
871     Encrypt2(12,_);
872     Encrypt2(10,_);
873     Encrypt2( 8,_);
874     Encrypt2( 6,_);
875     Encrypt2( 4,_);
876     Encrypt2( 2,_);
877     Encrypt2( 0,_);
878     #endif
879    
880     /* need to do (or undo, depending on your point of view) final swap */
881     #if LittleEndian
882     #define StoreBlockE(N) ((DWORD *)outBuffer)[N]=x[N^2] ^ sk[OUTPUT_WHITEN+N]
883     #else
884     #define StoreBlockE(N) { t0=x[N^2] ^ sk[OUTPUT_WHITEN+N]; ((DWORD *)outBuffer)[N]=Bswap(t0); }
885     #endif
886     StoreBlockE(0); StoreBlockE(1); StoreBlockE(2); StoreBlockE(3);
887     if (mode == MODE_CBC)
888     {
889     IV[0]=Bswap(((DWORD *)outBuffer)[0]);
890     IV[1]=Bswap(((DWORD *)outBuffer)[1]);
891     IV[2]=Bswap(((DWORD *)outBuffer)[2]);
892     IV[3]=Bswap(((DWORD *)outBuffer)[3]);
893     }
894     }
895    
896     if (mode == MODE_CBC)
897     BlockCopy(cipher->iv32,IV);
898    
899     return inputLen;
900     }
901    
902     /*
903     +*****************************************************************************
904     *
905     * Function Name: blockDecrypt
906     *
907     * Function: Decrypt block(s) of data using Twofish
908     *
909     * Arguments: cipher = ptr to already initialized cipherInstance
910     * key = ptr to already initialized keyInstance
911     * input = ptr to data blocks to be decrypted
912     * inputLen = # bits to encrypt (multiple of blockSize)
913     * outBuffer = ptr to where to put decrypted blocks
914     *
915     * Return: # bits ciphered (>= 0)
916     * else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL)
917     *
918     * Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits.
919     * If inputLen is not a multiple of BLOCK_SIZE bits in those modes,
920     * an error BAD_INPUT_LEN is returned. In CFB1 mode, all block
921     * sizes can be supported.
922     *
923     -****************************************************************************/
924     int blockDecrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input,
925     int inputLen, BYTE *outBuffer)
926     {
927     int i,n; /* loop counters */
928     DWORD x[BLOCK_SIZE/32]; /* block being encrypted */
929     DWORD t0,t1; /* temp variables */
930     int rounds=key->numRounds; /* number of rounds */
931     BYTE bit,bit0,ctBit,carry; /* temps for CFB */
932    
933     /* make local copies of things for faster access */
934     int mode = cipher->mode;
935     DWORD sk[TOTAL_SUBKEYS];
936     DWORD IV[BLOCK_SIZE/32];
937    
938     GetSboxKey;
939    
940     #if VALIDATE_PARMS
941     if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG))
942     return BAD_CIPHER_STATE;
943     if ((key == NULL) || (key->keySig != VALID_SIG))
944     return BAD_KEY_INSTANCE;
945     if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1))
946     return BAD_KEY_INSTANCE;
947     if ((cipher->mode != MODE_CFB1) && (inputLen % BLOCK_SIZE))
948     return BAD_INPUT_LEN;
949     #if ALIGN32
950     if ( (((int)cipher) & 3) || (((int)key ) & 3) ||
951     (((int)input) & 3) || (((int)outBuffer) & 3))
952     return BAD_ALIGN32;
953     #endif
954     #endif
955    
956     if (cipher->mode == MODE_CFB1)
957     { /* use blockEncrypt here to handle CFB, one block at a time */
958     cipher->mode = MODE_ECB; /* do encryption in ECB */
959     for (n=0;n<inputLen;n++)
960     {
961     blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x);
962     bit0 = 0x80 >> (n & 7);
963     ctBit = input[n/8] & bit0;
964     outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) |
965     (ctBit ^ ((((BYTE *) x)[0] & 0x80) >> (n&7)));
966     carry = ctBit >> (7 - (n&7));
967     for (i=BLOCK_SIZE/8-1;i>=0;i--)
968     {
969     bit = cipher->IV[i] >> 7; /* save next "carry" from shift */
970     cipher->IV[i] = (cipher->IV[i] << 1) ^ carry;
971     carry = bit;
972     }
973     }
974     cipher->mode = MODE_CFB1; /* restore mode for next time */
975     return inputLen;
976     }
977    
978     /* here for ECB, CBC modes */
979     if (key->direction != DIR_DECRYPT)
980     ReverseRoundSubkeys(key,DIR_DECRYPT); /* reverse the round subkey order */
981     #ifdef USE_ASM
982     if ((useAsm & 2) && (inputLen))
983     #ifdef COMPILE_KEY
984     if (key->keySig == VALID_SIG)
985     return ((CipherProc *)(key->decryptFuncPtr))(cipher,key,input,inputLen,outBuffer);
986     #else
987     return (*blockDecrypt_86)(cipher,key,input,inputLen,outBuffer);
988     #endif
989     #endif
990     /* make local copy of subkeys for speed */
991     memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds));
992     if (mode == MODE_CBC)
993     BlockCopy(IV,cipher->iv32)
994     else
995     IV[0]=IV[1]=IV[2]=IV[3]=0;
996    
997     for (n=0;n<inputLen;n+=BLOCK_SIZE,input+=BLOCK_SIZE/8,outBuffer+=BLOCK_SIZE/8)
998     {
999     #define LoadBlockD(N) x[N^2]=Bswap(((DWORD *)input)[N]) ^ sk[OUTPUT_WHITEN+N]
1000     LoadBlockD(0); LoadBlockD(1); LoadBlockD(2); LoadBlockD(3);
1001    
1002     #define DecryptRound(K,R,id) \
1003     t0 = Fe32##id(x[K ],0); \
1004     t1 = Fe32##id(x[K^1],3); \
1005     x[K^2] = ROL (x[K^2],1); \
1006     x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \
1007     x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \
1008     x[K^3] = ROR (x[K^3],1);
1009    
1010     #define Decrypt2(R,id) { DecryptRound(2,R+1,id); DecryptRound(0,R,id); }
1011    
1012     #if defined(ZERO_KEY)
1013     switch (key->keyLen)
1014     {
1015     case 128:
1016     for (i=rounds-2;i>=0;i-=2)
1017     Decrypt2(i,_128);
1018     break;
1019     case 192:
1020     for (i=rounds-2;i>=0;i-=2)
1021     Decrypt2(i,_192);
1022     break;
1023     case 256:
1024     for (i=rounds-2;i>=0;i-=2)
1025     Decrypt2(i,_256);
1026     break;
1027     }
1028     #else
1029     {
1030     Decrypt2(14,_);
1031     Decrypt2(12,_);
1032     Decrypt2(10,_);
1033     Decrypt2( 8,_);
1034     Decrypt2( 6,_);
1035     Decrypt2( 4,_);
1036     Decrypt2( 2,_);
1037     Decrypt2( 0,_);
1038     }
1039     #endif
1040     if (cipher->mode == MODE_ECB)
1041     {
1042     #if LittleEndian
1043     #define StoreBlockD(N) ((DWORD *)outBuffer)[N] = x[N] ^ sk[INPUT_WHITEN+N]
1044     #else
1045     #define StoreBlockD(N) { t0=x[N]^sk[INPUT_WHITEN+N]; ((DWORD *)outBuffer)[N] = Bswap(t0); }
1046     #endif
1047     StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3);
1048     #undef StoreBlockD
1049     continue;
1050     }
1051     else
1052     {
1053     #define StoreBlockD(N) x[N] ^= sk[INPUT_WHITEN+N] ^ IV[N]; \
1054     IV[N] = Bswap(((DWORD *)input)[N]); \
1055     ((DWORD *)outBuffer)[N] = Bswap(x[N]);
1056     StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3);
1057     #undef StoreBlockD
1058     }
1059     }
1060     if (mode == MODE_CBC) /* restore iv32 to cipher */
1061     BlockCopy(cipher->iv32,IV)
1062    
1063     return inputLen;
1064     }
1065    
1066     #ifdef GetCodeSize
1067     DWORD TwofishCodeSize(void)
1068     {
1069     DWORD x= Here(0);
1070     #ifdef USE_ASM
1071     if (useAsm & 3)
1072     return TwofishAsmCodeSize();
1073     #endif
1074     return x - TwofishCodeStart();
1075     };
1076     #endif