ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Crypt-Twofish2/twofish.c
Revision: 1.4
Committed: Sun Aug 1 18:09:47 2021 UTC (3 years, 2 months ago) by root
Content type: text/plain
Branch: MAIN
CVS Tags: rel-1_03, HEAD
Changes since 1.3: +2 -1 lines
Log Message:
1.03

File Contents

# Content
1 /***************************************************************************
2 TWOFISH2.C -- Optimized C API calls for TWOFISH AES submission
3
4 Submitters:
5 Bruce Schneier, Counterpane Systems
6 Doug Whiting, Hi/fn
7 John Kelsey, Counterpane Systems
8 Chris Hall, Counterpane Systems
9 David Wagner, UC Berkeley
10
11 Code Author: Doug Whiting, Hi/fn
12
13 Version 1.00 April 1998
14
15 Copyright 1998, Hi/fn and Counterpane Systems. All rights reserved.
16
17 Notes:
18 * Optimized version
19 * Tab size is set to 4 characters in this file
20
21 ***************************************************************************/
22 #include "aes.h"
23 #include "table.h"
24
25 #include <memory.h>
26 /*#include <assert.h>*/
27
28 #if defined(min_key) && !defined(MIN_KEY)
29 #define MIN_KEY 1 /* toupper() */
30 #elif defined(part_key) && !defined(PART_KEY)
31 #define PART_KEY 1
32 #elif defined(zero_key) && !defined(ZERO_KEY)
33 #define ZERO_KEY 1
34 #endif
35
36
37 #ifdef USE_ASM
38 extern int useAsm; /* ok to use ASM code? */
39
40 typedef int cdecl CipherProc
41 (cipherInstance *cipher, keyInstance *key,BYTE *input,int inputLen,BYTE *outBuffer);
42 typedef int cdecl KeySetupProc(keyInstance *key);
43
44 extern CipherProc *blockEncrypt_86; /* ptr to ASM functions */
45 extern CipherProc *blockDecrypt_86;
46 extern KeySetupProc *reKey_86;
47 extern DWORD cdecl TwofishAsmCodeSize(void);
48 #endif
49
50 /*
51 +*****************************************************************************
52 * Constants/Macros/Tables
53 -****************************************************************************/
54
55 #define CONST /* help syntax from C++, NOP here */
56
57 static CONST fullSbox MDStab; /* not actually const. Initialized ONE time */
58 static int needToBuildMDS=1; /* is MDStab initialized yet? */
59
60 #define BIG_TAB 0
61
62 #if BIG_TAB
63 static BYTE bigTab[4][256][256]; /* pre-computed S-box */
64 #endif
65
66 /* number of rounds for various key sizes: 128, 192, 256 */
67 /* (ignored for now in optimized code!) */
68 static CONST int numRounds[4]= {0,ROUNDS_128,ROUNDS_192,ROUNDS_256};
69
70 #if REENTRANT
71 #define _sBox_ key->sBox8x32
72 #else
73 static fullSbox _sBox_; /* permuted MDStab based on keys */
74 #endif
75 #define _sBox8_(N) (((BYTE *) _sBox_) + (N)*256)
76
77 /*------- see what level of S-box precomputation we need to do -----*/
78 #if defined(ZERO_KEY)
79 #define MOD_STRING "(Zero S-box keying)"
80 #define Fe32_128(x,R) \
81 ( MDStab[0][p8(01)[p8(02)[_b(x,R )]^b0(SKEY[1])]^b0(SKEY[0])] ^ \
82 MDStab[1][p8(11)[p8(12)[_b(x,R+1)]^b1(SKEY[1])]^b1(SKEY[0])] ^ \
83 MDStab[2][p8(21)[p8(22)[_b(x,R+2)]^b2(SKEY[1])]^b2(SKEY[0])] ^ \
84 MDStab[3][p8(31)[p8(32)[_b(x,R+3)]^b3(SKEY[1])]^b3(SKEY[0])] )
85 #define Fe32_192(x,R) \
86 ( MDStab[0][p8(01)[p8(02)[p8(03)[_b(x,R )]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \
87 MDStab[1][p8(11)[p8(12)[p8(13)[_b(x,R+1)]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \
88 MDStab[2][p8(21)[p8(22)[p8(23)[_b(x,R+2)]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \
89 MDStab[3][p8(31)[p8(32)[p8(33)[_b(x,R+3)]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] )
90 #define Fe32_256(x,R) \
91 ( MDStab[0][p8(01)[p8(02)[p8(03)[p8(04)[_b(x,R )]^b0(SKEY[3])]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \
92 MDStab[1][p8(11)[p8(12)[p8(13)[p8(14)[_b(x,R+1)]^b1(SKEY[3])]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \
93 MDStab[2][p8(21)[p8(22)[p8(23)[p8(24)[_b(x,R+2)]^b2(SKEY[3])]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \
94 MDStab[3][p8(31)[p8(32)[p8(33)[p8(34)[_b(x,R+3)]^b3(SKEY[3])]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] )
95
96 #define GetSboxKey DWORD SKEY[4]; /* local copy */ \
97 memcpy(SKEY,key->sboxKeys,sizeof(SKEY));
98 /*----------------------------------------------------------------*/
99 #elif defined(MIN_KEY)
100 #define MOD_STRING "(Minimal keying)"
101 #define Fe32_(x,R)(MDStab[0][p8(01)[_sBox8_(0)[_b(x,R )]] ^ b0(SKEY0)] ^ \
102 MDStab[1][p8(11)[_sBox8_(1)[_b(x,R+1)]] ^ b1(SKEY0)] ^ \
103 MDStab[2][p8(21)[_sBox8_(2)[_b(x,R+2)]] ^ b2(SKEY0)] ^ \
104 MDStab[3][p8(31)[_sBox8_(3)[_b(x,R+3)]] ^ b3(SKEY0)])
105 #define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; }
106 #define GetSboxKey DWORD SKEY0 = key->sboxKeys[0] /* local copy */
107 /*----------------------------------------------------------------*/
108 #elif defined(PART_KEY)
109 #define MOD_STRING "(Partial keying)"
110 #define Fe32_(x,R)(MDStab[0][_sBox8_(0)[_b(x,R )]] ^ \
111 MDStab[1][_sBox8_(1)[_b(x,R+1)]] ^ \
112 MDStab[2][_sBox8_(2)[_b(x,R+2)]] ^ \
113 MDStab[3][_sBox8_(3)[_b(x,R+3)]])
114 #define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; }
115 #define GetSboxKey
116 /*----------------------------------------------------------------*/
117 #else /* default is FULL_KEY */
118 #ifndef FULL_KEY
119 #define FULL_KEY 1
120 #endif
121 #if BIG_TAB
122 #define TAB_STR " (Big table)"
123 #else
124 #define TAB_STR
125 #endif
126 #ifdef COMPILE_KEY
127 #define MOD_STRING "(Compiled subkeys)" TAB_STR
128 #else
129 #define MOD_STRING "(Full keying)" TAB_STR
130 #endif
131 /* Fe32_ does a full S-box + MDS lookup. Need to #define _sBox_ before use.
132 Note that we "interleave" 0,1, and 2,3 to avoid cache bank collisions
133 in optimized assembly language.
134 */
135 #define Fe32_(x,R) (_sBox_[0][2*_b(x,R )] ^ _sBox_[0][2*_b(x,R+1)+1] ^ \
136 _sBox_[2][2*_b(x,R+2)] ^ _sBox_[2][2*_b(x,R+3)+1])
137 /* set a single S-box value, given the input byte */
138 //#define sbSet(N,i,J,v) { _sBox_[N&2][2*i+(N&1)+2*J]=MDStab[N][v]; }
139 #define sbSet(N,i,J,v) { *((DWORD *)_sBox_ + (N&2)*256 + 2*i + (N&1) + 2*J) = MDStab[N][v]; }
140 #define GetSboxKey
141 #endif
142
143 /* macro(s) for debugging help */
144 #define CHECK_TABLE 0 /* nonzero --> compare against "slow" table */
145 #define VALIDATE_PARMS 0 /* disable for full speed */
146
147 /* end of debug macros */
148
149 #ifdef GetCodeSize
150 static extern DWORD Here(DWORD x); /* return caller's address! */
151 static DWORD TwofishCodeStart(void) { return Here(0); }
152 #endif
153
154 /*
155 +*****************************************************************************
156 *
157 * Function Name: TableOp
158 *
159 * Function: Handle table use checking
160 *
161 * Arguments: op = what to do (see TAB_* defns in AES.H)
162 *
163 * Return: TRUE --> done (for TAB_QUERY)
164 *
165 * Notes: This routine is for use in generating the tables KAT file.
166 * For this optimized version, we don't actually track table usage,
167 * since it would make the macros incredibly ugly. Instead we just
168 * run for a fixed number of queries and then say we're done.
169 *
170 -****************************************************************************/
171 static int TableOp(int op)
172 {
173 static int queryCnt=0;
174
175 switch (op)
176 {
177 case TAB_DISABLE:
178 break;
179 case TAB_ENABLE:
180 break;
181 case TAB_RESET:
182 queryCnt=0;
183 break;
184 case TAB_QUERY:
185 queryCnt++;
186 if (queryCnt < TAB_MIN_QUERY)
187 return FALSE;
188 }
189 return TRUE;
190 }
191
192
193 #if CHECK_TABLE
194 /*
195 +*****************************************************************************
196 *
197 * Function Name: f32
198 *
199 * Function: Run four bytes through keyed S-boxes and apply MDS matrix
200 *
201 * Arguments: x = input to f function
202 * k32 = pointer to key dwords
203 * keyLen = total key length (k32 --> keyLey/2 bits)
204 *
205 * Return: The output of the keyed permutation applied to x.
206 *
207 * Notes:
208 * This function is a keyed 32-bit permutation. It is the major building
209 * block for the Twofish round function, including the four keyed 8x8
210 * permutations and the 4x4 MDS matrix multiply. This function is used
211 * both for generating round subkeys and within the round function on the
212 * block being encrypted.
213 *
214 * This version is fairly slow and pedagogical, although a smartcard would
215 * probably perform the operation exactly this way in firmware. For
216 * ultimate performance, the entire operation can be completed with four
217 * lookups into four 256x32-bit tables, with three dword xors.
218 *
219 * The MDS matrix is defined in TABLE.H. To multiply by Mij, just use the
220 * macro Mij(x).
221 *
222 -****************************************************************************/
223 static DWORD f32(DWORD x,CONST DWORD *k32,int keyLen)
224 {
225 BYTE b[4];
226
227 /* Run each byte thru 8x8 S-boxes, xoring with key byte at each stage. */
228 /* Note that each byte goes through a different combination of S-boxes.*/
229
230 *((DWORD *)b) = Bswap(x); /* make b[0] = LSB, b[3] = MSB */
231 switch (((keyLen + 63)/64) & 3)
232 {
233 case 0: /* 256 bits of key */
234 b[0] = p8(04)[b[0]] ^ b0(k32[3]);
235 b[1] = p8(14)[b[1]] ^ b1(k32[3]);
236 b[2] = p8(24)[b[2]] ^ b2(k32[3]);
237 b[3] = p8(34)[b[3]] ^ b3(k32[3]);
238 /* fall thru, having pre-processed b[0]..b[3] with k32[3] */
239 case 3: /* 192 bits of key */
240 b[0] = p8(03)[b[0]] ^ b0(k32[2]);
241 b[1] = p8(13)[b[1]] ^ b1(k32[2]);
242 b[2] = p8(23)[b[2]] ^ b2(k32[2]);
243 b[3] = p8(33)[b[3]] ^ b3(k32[2]);
244 /* fall thru, having pre-processed b[0]..b[3] with k32[2] */
245 case 2: /* 128 bits of key */
246 b[0] = p8(00)[p8(01)[p8(02)[b[0]] ^ b0(k32[1])] ^ b0(k32[0])];
247 b[1] = p8(10)[p8(11)[p8(12)[b[1]] ^ b1(k32[1])] ^ b1(k32[0])];
248 b[2] = p8(20)[p8(21)[p8(22)[b[2]] ^ b2(k32[1])] ^ b2(k32[0])];
249 b[3] = p8(30)[p8(31)[p8(32)[b[3]] ^ b3(k32[1])] ^ b3(k32[0])];
250 }
251
252 /* Now perform the MDS matrix multiply inline. */
253 return ((M00(b[0]) ^ M01(b[1]) ^ M02(b[2]) ^ M03(b[3])) ) ^
254 ((M10(b[0]) ^ M11(b[1]) ^ M12(b[2]) ^ M13(b[3])) << 8) ^
255 ((M20(b[0]) ^ M21(b[1]) ^ M22(b[2]) ^ M23(b[3])) << 16) ^
256 ((M30(b[0]) ^ M31(b[1]) ^ M32(b[2]) ^ M33(b[3])) << 24) ;
257 }
258 #endif /* CHECK_TABLE */
259
260
261 /*
262 +*****************************************************************************
263 *
264 * Function Name: RS_MDS_encode
265 *
266 * Function: Use (12,8) Reed-Solomon code over GF(256) to produce
267 * a key S-box dword from two key material dwords.
268 *
269 * Arguments: k0 = 1st dword
270 * k1 = 2nd dword
271 *
272 * Return: Remainder polynomial generated using RS code
273 *
274 * Notes:
275 * Since this computation is done only once per reKey per 64 bits of key,
276 * the performance impact of this routine is imperceptible. The RS code
277 * chosen has "simple" coefficients to allow smartcard/hardware implementation
278 * without lookup tables.
279 *
280 -****************************************************************************/
281 static DWORD RS_MDS_Encode(DWORD k0,DWORD k1)
282 {
283 int i,j;
284 DWORD r;
285
286 for (i=r=0;i<2;i++)
287 {
288 r ^= (i) ? k0 : k1; /* merge in 32 more key bits */
289 for (j=0;j<4;j++) /* shift one byte at a time */
290 RS_rem(r);
291 }
292 return r;
293 }
294
295
296 /*
297 +*****************************************************************************
298 *
299 * Function Name: BuildMDS
300 *
301 * Function: Initialize the MDStab array
302 *
303 * Arguments: None.
304 *
305 * Return: None.
306 *
307 * Notes:
308 * Here we precompute all the fixed MDS table. This only needs to be done
309 * one time at initialization, after which the table is "CONST".
310 *
311 -****************************************************************************/
312 static void BuildMDS(void)
313 {
314 int i;
315 DWORD d;
316 BYTE m1[2],mX[2],mY[4];
317
318 for (i=0;i<256;i++)
319 {
320 m1[0]=P8x8[0][i]; /* compute all the matrix elements */
321 mX[0]=(BYTE) Mul_X(m1[0]);
322 mY[0]=(BYTE) Mul_Y(m1[0]);
323
324 m1[1]=P8x8[1][i];
325 mX[1]=(BYTE) Mul_X(m1[1]);
326 mY[1]=(BYTE) Mul_Y(m1[1]);
327
328 #undef Mul_1 /* change what the pre-processor does with Mij */
329 #undef Mul_X
330 #undef Mul_Y
331 #define Mul_1 m1 /* It will now access m01[], m5B[], and mEF[] */
332 #define Mul_X mX
333 #define Mul_Y mY
334
335 #define SetMDS(N) \
336 b0(d) = M0##N[P_##N##0]; \
337 b1(d) = M1##N[P_##N##0]; \
338 b2(d) = M2##N[P_##N##0]; \
339 b3(d) = M3##N[P_##N##0]; \
340 MDStab[N][i] = d;
341
342 SetMDS(0); /* fill in the matrix with elements computed above */
343 SetMDS(1);
344 SetMDS(2);
345 SetMDS(3);
346 }
347 #undef Mul_1
348 #undef Mul_X
349 #undef Mul_Y
350 #define Mul_1 Mx_1 /* re-enable true multiply */
351 #define Mul_X Mx_X
352 #define Mul_Y Mx_Y
353
354 #if BIG_TAB
355 {
356 int j,k;
357 BYTE *q0,*q1;
358
359 for (i=0;i<4;i++)
360 {
361 switch (i)
362 {
363 case 0: q0=p8(01); q1=p8(02); break;
364 case 1: q0=p8(11); q1=p8(12); break;
365 case 2: q0=p8(21); q1=p8(22); break;
366 case 3: q0=p8(31); q1=p8(32); break;
367 }
368 for (j=0;j<256;j++)
369 for (k=0;k<256;k++)
370 bigTab[i][j][k]=q0[q1[k]^j];
371 }
372 }
373 #endif
374
375 needToBuildMDS=0; /* NEVER modify the table again! */
376 }
377
378 /*
379 +*****************************************************************************
380 *
381 * Function Name: ReverseRoundSubkeys
382 *
383 * Function: Reverse order of round subkeys to switch between encrypt/decrypt
384 *
385 * Arguments: key = ptr to keyInstance to be reversed
386 * newDir = new direction value
387 *
388 * Return: None.
389 *
390 * Notes:
391 * This optimization allows both blockEncrypt and blockDecrypt to use the same
392 * "fallthru" switch statement based on the number of rounds.
393 * Note that key->numRounds must be even and >= 2 here.
394 *
395 -****************************************************************************/
396 static void ReverseRoundSubkeys(keyInstance *key,BYTE newDir)
397 {
398 DWORD t0,t1;
399 register DWORD *r0=key->subKeys+ROUND_SUBKEYS;
400 register DWORD *r1=r0 + 2*key->numRounds - 2;
401
402 for (;r0 < r1;r0+=2,r1-=2)
403 {
404 t0=r0[0]; /* swap the order */
405 t1=r0[1];
406 r0[0]=r1[0]; /* but keep relative order within pairs */
407 r0[1]=r1[1];
408 r1[0]=t0;
409 r1[1]=t1;
410 }
411
412 key->direction=newDir;
413 }
414
415 /*
416 +*****************************************************************************
417 *
418 * Function Name: Xor256
419 *
420 * Function: Copy an 8-bit permutation (256 bytes), xoring with a byte
421 *
422 * Arguments: dst = where to put result
423 * src = where to get data (can be same asa dst)
424 * b = byte to xor
425 *
426 * Return: None
427 *
428 * Notes:
429 * BorlandC's optimization is terrible! When we put the code inline,
430 * it generates fairly good code in the *following* segment (not in the Xor256
431 * code itself). If the call is made, the code following the call is awful!
432 * The penalty is nearly 50%! So we take the code size hit for inlining for
433 * Borland, while Microsoft happily works with a call.
434 *
435 -****************************************************************************/
436 #if defined(__BORLANDC__) /* do it inline */
437 #define Xor32(dst,src,i) { ((DWORD *)dst)[i] = ((DWORD *)src)[i] ^ tmpX; }
438 #define Xor256(dst,src,b) \
439 { \
440 register DWORD tmpX=0x01010101u * b;\
441 for (i=0;i<64;i+=4) \
442 { Xor32(dst,src,i ); Xor32(dst,src,i+1); Xor32(dst,src,i+2); Xor32(dst,src,i+3); } \
443 }
444 #else /* do it as a function call */
445 static void Xor256(void *dst,void *src,BYTE b)
446 {
447 register DWORD x=b*0x01010101u; /* replicate byte to all four bytes */
448 register DWORD *d=(DWORD *)dst;
449 register DWORD *s=(DWORD *)src;
450 #define X_8(N) { d[N]=s[N] ^ x; d[N+1]=s[N+1] ^ x; }
451 #define X_32(N) { X_8(N); X_8(N+2); X_8(N+4); X_8(N+6); }
452 X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */
453 d+=32; /* keep offsets small! */
454 s+=32;
455 X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */
456 }
457 #endif
458
459 /*
460 +*****************************************************************************
461 *
462 * Function Name: reKey
463 *
464 * Function: Initialize the Twofish key schedule from key32
465 *
466 * Arguments: key = ptr to keyInstance to be initialized
467 *
468 * Return: TRUE on success
469 *
470 * Notes:
471 * Here we precompute all the round subkeys, although that is not actually
472 * required. For example, on a smartcard, the round subkeys can
473 * be generated on-the-fly using f32()
474 *
475 -****************************************************************************/
476 static int reKey(keyInstance *key)
477 {
478 int i,j,k64Cnt,keyLen;
479 int subkeyCnt;
480 DWORD A=0,B=0,q;
481 DWORD sKey[MAX_KEY_BITS/64],k32e[MAX_KEY_BITS/64],k32o[MAX_KEY_BITS/64];
482 BYTE L0[256],L1[256]; /* small local 8-bit permutations */
483
484 #if VALIDATE_PARMS
485 #if ALIGN32
486 if (((int)key) & 3)
487 return BAD_ALIGN32;
488 if ((key->keyLen % 64) || (key->keyLen < MIN_KEY_BITS))
489 return BAD_KEY_INSTANCE;
490 #endif
491 #endif
492
493 if (needToBuildMDS) /* do this one time only */
494 BuildMDS();
495
496 #define F32(res,x,k32) \
497 { \
498 DWORD t=x; \
499 switch (k64Cnt & 3) \
500 { \
501 case 0: /* same as 4 */ \
502 b0(t) = p8(04)[b0(t)] ^ b0(k32[3]); \
503 b1(t) = p8(14)[b1(t)] ^ b1(k32[3]); \
504 b2(t) = p8(24)[b2(t)] ^ b2(k32[3]); \
505 b3(t) = p8(34)[b3(t)] ^ b3(k32[3]); \
506 /* fall thru, having pre-processed t */ \
507 case 3: b0(t) = p8(03)[b0(t)] ^ b0(k32[2]); \
508 b1(t) = p8(13)[b1(t)] ^ b1(k32[2]); \
509 b2(t) = p8(23)[b2(t)] ^ b2(k32[2]); \
510 b3(t) = p8(33)[b3(t)] ^ b3(k32[2]); \
511 /* fall thru, having pre-processed t */ \
512 case 2: /* 128-bit keys (optimize for this case) */ \
513 res= MDStab[0][p8(01)[p8(02)[b0(t)] ^ b0(k32[1])] ^ b0(k32[0])] ^ \
514 MDStab[1][p8(11)[p8(12)[b1(t)] ^ b1(k32[1])] ^ b1(k32[0])] ^ \
515 MDStab[2][p8(21)[p8(22)[b2(t)] ^ b2(k32[1])] ^ b2(k32[0])] ^ \
516 MDStab[3][p8(31)[p8(32)[b3(t)] ^ b3(k32[1])] ^ b3(k32[0])] ; \
517 } \
518 }
519
520
521 #if !CHECK_TABLE
522 #if defined(USE_ASM) /* only do this if not using assember */
523 if (!(useAsm & 4))
524 #endif
525 #endif
526 {
527 subkeyCnt = ROUND_SUBKEYS + 2*key->numRounds;
528 keyLen=key->keyLen;
529 k64Cnt=(keyLen+63)/64; /* number of 64-bit key words */
530 for (i=0,j=k64Cnt-1;i<k64Cnt;i++,j--)
531 { /* split into even/odd key dwords */
532 k32e[i]=key->key32[2*i ];
533 k32o[i]=key->key32[2*i+1];
534 /* compute S-box keys using (12,8) Reed-Solomon code over GF(256) */
535 sKey[j]=key->sboxKeys[j]=RS_MDS_Encode(k32e[i],k32o[i]); /* reverse order */
536 }
537 }
538
539 #ifdef USE_ASM
540 if (useAsm & 4)
541 {
542 #if defined(COMPILE_KEY) && defined(USE_ASM)
543 key->keySig = VALID_SIG; /* show that we are initialized */
544 key->codeSize = sizeof(key->compiledCode); /* set size */
545 #endif
546 reKey_86(key);
547 }
548 else
549 #endif
550 {
551 for (i=q=0;i<subkeyCnt/2;i++,q+=SK_STEP)
552 { /* compute round subkeys for PHT */
553 F32(A,q ,k32e); /* A uses even key dwords */
554 F32(B,q+SK_BUMP,k32o); /* B uses odd key dwords */
555 B = ROL(B,8);
556 key->subKeys[2*i ] = A+B; /* combine with a PHT */
557 B = A + 2*B;
558 key->subKeys[2*i+1] = ROL(B,SK_ROTL);
559 }
560 #if !defined(ZERO_KEY)
561 switch (keyLen) /* case out key length for speed in generating S-boxes */
562 {
563 case 128:
564 #if defined(FULL_KEY) || defined(PART_KEY)
565 #if BIG_TAB
566 #define one128(N,J) sbSet(N,i,J,L0[i+J])
567 #define sb128(N) { \
568 BYTE *qq=bigTab[N][b##N(sKey[1])]; \
569 Xor256(L0,qq,b##N(sKey[0])); \
570 for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } }
571 #else
572 #define one128(N,J) sbSet(N,i,J,p8(N##1)[L0[i+J]]^k0)
573 #define sb128(N) { \
574 Xor256(L0,p8(N##2),b##N(sKey[1])); \
575 { register DWORD k0=b##N(sKey[0]); \
576 for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } } }
577 #endif
578 #elif defined(MIN_KEY)
579 #define sb128(N) Xor256(_sBox8_(N),p8(N##2),b##N(sKey[1]))
580 #endif
581 sb128(0); sb128(1); sb128(2); sb128(3);
582 break;
583 case 192:
584 #if defined(FULL_KEY) || defined(PART_KEY)
585 #define one192(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0)
586 #define sb192(N) { \
587 Xor256(L0,p8(N##3),b##N(sKey[2])); \
588 { register DWORD k0=b##N(sKey[0]); \
589 register DWORD k1=b##N(sKey[1]); \
590 for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } }
591 #elif defined(MIN_KEY)
592 #define one192(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1)
593 #define sb192(N) { \
594 Xor256(L0,p8(N##3),b##N(sKey[2])); \
595 { register DWORD k1=b##N(sKey[1]); \
596 for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } }
597 #endif
598 sb192(0); sb192(1); sb192(2); sb192(3);
599 break;
600 case 256:
601 #if defined(FULL_KEY) || defined(PART_KEY)
602 #define one256(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0)
603 #define sb256(N) { \
604 Xor256(L1,p8(N##4),b##N(sKey[3])); \
605 for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \
606 L0[i+1]=p8(N##3)[L1[i+1]]; } \
607 Xor256(L0,L0,b##N(sKey[2])); \
608 { register DWORD k0=b##N(sKey[0]); \
609 register DWORD k1=b##N(sKey[1]); \
610 for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } }
611 #elif defined(MIN_KEY)
612 #define one256(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1)
613 #define sb256(N) { \
614 Xor256(L1,p8(N##4),b##N(sKey[3])); \
615 for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \
616 L0[i+1]=p8(N##3)[L1[i+1]]; } \
617 Xor256(L0,L0,b##N(sKey[2])); \
618 { register DWORD k1=b##N(sKey[1]); \
619 for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } }
620 #endif
621 sb256(0); sb256(1); sb256(2); sb256(3);
622 break;
623 }
624 #endif
625 }
626
627 #if CHECK_TABLE /* sanity check vs. pedagogical code*/
628 {
629 GetSboxKey;
630 for (i=0;i<subkeyCnt/2;i++)
631 {
632 A = f32(i*SK_STEP ,k32e,keyLen); /* A uses even key dwords */
633 B = f32(i*SK_STEP+SK_BUMP,k32o,keyLen); /* B uses odd key dwords */
634 B = ROL(B,8);
635 assert(key->subKeys[2*i ] == A+ B);
636 assert(key->subKeys[2*i+1] == ROL(A+2*B,SK_ROTL));
637 }
638 #if !defined(ZERO_KEY) /* any S-boxes to check? */
639 for (i=q=0;i<256;i++,q+=0x01010101)
640 assert(f32(q,key->sboxKeys,keyLen) == Fe32_(q,0));
641 #endif
642 }
643 #endif /* CHECK_TABLE */
644
645 if (key->direction == DIR_ENCRYPT)
646 ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */
647
648 return TRUE;
649 }
650 /*
651 +*****************************************************************************
652 *
653 * Function Name: makeKey
654 *
655 * Function: Initialize the Twofish key schedule
656 *
657 * Arguments: key = ptr to keyInstance to be initialized
658 * direction = DIR_ENCRYPT or DIR_DECRYPT
659 * keyLen = # bits of key text at *keyMaterial
660 * keyMaterial = ptr to hex ASCII chars representing key bits
661 *
662 * Return: TRUE on success
663 * else error code (e.g., BAD_KEY_DIR)
664 *
665 * Notes: This parses the key bits from keyMaterial. Zeroes out unused key bits
666 *
667 -****************************************************************************/
668 static int makeKey(keyInstance *key, BYTE direction, int keyLen,CONST char *keyMaterial)
669 {
670 int i;
671
672 #if VALIDATE_PARMS /* first, sanity check on parameters */
673 if (key == NULL)
674 return BAD_KEY_INSTANCE;/* must have a keyInstance to initialize */
675 if ((direction != DIR_ENCRYPT) && (direction != DIR_DECRYPT))
676 return BAD_KEY_DIR; /* must have valid direction */
677 if ((keyLen > MAX_KEY_BITS) || (keyLen < 8) || (keyLen & 0x3F))
678 return BAD_KEY_MAT; /* length must be valid */
679 key->keySig = VALID_SIG; /* show that we are initialized */
680 #if ALIGN32
681 if ((((int)key) & 3) || (((int)key->key32) & 3))
682 return BAD_ALIGN32;
683 #endif
684 #endif
685
686 key->direction = direction;/* set our cipher direction */
687 key->keyLen = (keyLen+63) & ~63; /* round up to multiple of 64 */
688 key->numRounds = numRounds[(keyLen-1)/64];
689 memset(key->key32,0,sizeof(key->key32)); /* zero unused bits */
690
691 if (keyMaterial == NULL)
692 return TRUE; /* allow a "dummy" call */
693
694 for (i=0;i<keyLen/32;i++) /* make byte-oriented copy for CFB1 */
695 key->key32[i] = (((unsigned char *)keyMaterial)[i*4+0] << 0)
696 | (((unsigned char *)keyMaterial)[i*4+1] << 8)
697 | (((unsigned char *)keyMaterial)[i*4+2] << 16)
698 | (((unsigned char *)keyMaterial)[i*4+3] << 24);
699
700 return reKey(key); /* generate round subkeys */
701 }
702
703
704 /*
705 +*****************************************************************************
706 *
707 * Function Name: cipherInit
708 *
709 * Function: Initialize the Twofish cipher in a given mode
710 *
711 * Arguments: cipher = ptr to cipherInstance to be initialized
712 * mode = MODE_ECB, MODE_CBC, or MODE_CFB1
713 * IV = ptr to hex ASCII test representing IV bytes
714 *
715 * Return: TRUE on success
716 * else error code (e.g., BAD_CIPHER_MODE)
717 *
718 -****************************************************************************/
719 static int cipherInit(cipherInstance *cipher, BYTE mode,CONST char *IV)
720 {
721 int i;
722 #if VALIDATE_PARMS /* first, sanity check on parameters */
723 if (cipher == NULL)
724 return BAD_PARAMS; /* must have a cipherInstance to initialize */
725 if ((mode != MODE_ECB) && (mode != MODE_CBC) && (mode != MODE_CFB1))
726 return BAD_CIPHER_MODE; /* must have valid cipher mode */
727 cipher->cipherSig = VALID_SIG;
728 #if ALIGN32
729 if ((((int)cipher) & 3) || (((int)cipher->IV) & 3) || (((int)cipher->iv32) & 3))
730 return BAD_ALIGN32;
731 #endif
732 #endif
733
734 if ((mode != MODE_ECB) && (IV)) /* parse the IV */
735 {
736 memcpy (cipher->iv32, IV, BLOCK_SIZE/32);
737 for (i=0;i<BLOCK_SIZE/32;i++) /* make byte-oriented copy for CFB1 */
738 ((DWORD *)cipher->IV)[i] = Bswap(cipher->iv32[i]);
739 }
740
741 cipher->mode = mode;
742
743 return TRUE;
744 }
745
746 /*
747 +*****************************************************************************
748 *
749 * Function Name: blockEncrypt
750 *
751 * Function: Encrypt block(s) of data using Twofish
752 *
753 * Arguments: cipher = ptr to already initialized cipherInstance
754 * key = ptr to already initialized keyInstance
755 * input = ptr to data blocks to be encrypted
756 * inputLen = # bits to encrypt (multiple of blockSize)
757 * outBuffer = ptr to where to put encrypted blocks
758 *
759 * Return: # bits ciphered (>= 0)
760 * else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL)
761 *
762 * Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits.
763 * If inputLen is not a multiple of BLOCK_SIZE bits in those modes,
764 * an error BAD_INPUT_LEN is returned. In CFB1 mode, all block
765 * sizes can be supported.
766 *
767 -****************************************************************************/
768 static int blockEncrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input,
769 int inputLen, BYTE *outBuffer)
770 {
771 int i,n; /* loop counters */
772 DWORD x[BLOCK_SIZE/32]; /* block being encrypted */
773 DWORD t0,t1; /* temp variables */
774 int rounds=key->numRounds; /* number of rounds */
775 BYTE bit,bit0,ctBit,carry; /* temps for CFB */
776
777 /* make local copies of things for faster access */
778 int mode = cipher->mode;
779 DWORD sk[TOTAL_SUBKEYS];
780 DWORD IV[BLOCK_SIZE/32];
781
782 GetSboxKey;
783
784 #if VALIDATE_PARMS
785 if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG))
786 return BAD_CIPHER_STATE;
787 if ((key == NULL) || (key->keySig != VALID_SIG))
788 return BAD_KEY_INSTANCE;
789 if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1))
790 return BAD_KEY_INSTANCE;
791 if ((mode != MODE_CFB1) && (inputLen % BLOCK_SIZE))
792 return BAD_INPUT_LEN;
793 #if ALIGN32
794 if ( (((int)cipher) & 3) || (((int)key ) & 3) ||
795 (((int)input ) & 3) || (((int)outBuffer) & 3))
796 return BAD_ALIGN32;
797 #endif
798 #endif
799
800 if (mode == MODE_CFB1)
801 { /* use recursion here to handle CFB, one block at a time */
802 cipher->mode = MODE_ECB; /* do encryption in ECB */
803 for (n=0;n<inputLen;n++)
804 {
805 blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x);
806 bit0 = 0x80 >> (n & 7);/* which bit position in byte */
807 ctBit = (input[n/8] & bit0) ^ ((((BYTE *) x)[0] & 0x80) >> (n&7));
808 outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) | ctBit;
809 carry = ctBit >> (7 - (n&7));
810 for (i=BLOCK_SIZE/8-1;i>=0;i--)
811 {
812 bit = cipher->IV[i] >> 7; /* save next "carry" from shift */
813 cipher->IV[i] = (cipher->IV[i] << 1) ^ carry;
814 carry = bit;
815 }
816 }
817 cipher->mode = MODE_CFB1; /* restore mode for next time */
818 return inputLen;
819 }
820
821 /* here for ECB, CBC modes */
822 if (key->direction != DIR_ENCRYPT)
823 ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */
824
825 #ifdef USE_ASM
826 if ((useAsm & 1) && (inputLen))
827 #ifdef COMPILE_KEY
828 if (key->keySig == VALID_SIG)
829 return ((CipherProc *)(key->encryptFuncPtr))(cipher,key,input,inputLen,outBuffer);
830 #else
831 return (*blockEncrypt_86)(cipher,key,input,inputLen,outBuffer);
832 #endif
833 #endif
834 /* make local copy of subkeys for speed */
835 memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds));
836 if (mode == MODE_CBC)
837 BlockCopy(IV,cipher->iv32)
838 else
839 IV[0]=IV[1]=IV[2]=IV[3]=0;
840
841 for (n=0;n<inputLen;n+=BLOCK_SIZE,input+=BLOCK_SIZE/8,outBuffer+=BLOCK_SIZE/8)
842 {
843 #define LoadBlockE(N) x[N]=Bswap(((DWORD *)input)[N]) ^ sk[INPUT_WHITEN+N] ^ IV[N]
844 LoadBlockE(0); LoadBlockE(1); LoadBlockE(2); LoadBlockE(3);
845 #define EncryptRound(K,R,id) \
846 t0 = Fe32##id(x[K ],0); \
847 t1 = Fe32##id(x[K^1],3); \
848 x[K^3] = ROL(x[K^3],1); \
849 x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \
850 x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \
851 x[K^2] = ROR(x[K^2],1);
852 #define Encrypt2(R,id) { EncryptRound(0,R+1,id); EncryptRound(2,R,id); }
853
854 #if defined(ZERO_KEY)
855 switch (key->keyLen)
856 {
857 case 128:
858 for (i=rounds-2;i>=0;i-=2)
859 Encrypt2(i,_128);
860 break;
861 case 192:
862 for (i=rounds-2;i>=0;i-=2)
863 Encrypt2(i,_192);
864 break;
865 case 256:
866 for (i=rounds-2;i>=0;i-=2)
867 Encrypt2(i,_256);
868 break;
869 }
870 #else
871 Encrypt2(14,_);
872 Encrypt2(12,_);
873 Encrypt2(10,_);
874 Encrypt2( 8,_);
875 Encrypt2( 6,_);
876 Encrypt2( 4,_);
877 Encrypt2( 2,_);
878 Encrypt2( 0,_);
879 #endif
880
881 /* need to do (or undo, depending on your point of view) final swap */
882 #if LittleEndian
883 #define StoreBlockE(N) ((DWORD *)outBuffer)[N]=x[N^2] ^ sk[OUTPUT_WHITEN+N]
884 #else
885 #define StoreBlockE(N) { t0=x[N^2] ^ sk[OUTPUT_WHITEN+N]; ((DWORD *)outBuffer)[N]=Bswap(t0); }
886 #endif
887 StoreBlockE(0); StoreBlockE(1); StoreBlockE(2); StoreBlockE(3);
888 if (mode == MODE_CBC)
889 {
890 IV[0]=Bswap(((DWORD *)outBuffer)[0]);
891 IV[1]=Bswap(((DWORD *)outBuffer)[1]);
892 IV[2]=Bswap(((DWORD *)outBuffer)[2]);
893 IV[3]=Bswap(((DWORD *)outBuffer)[3]);
894 }
895 }
896
897 if (mode == MODE_CBC)
898 BlockCopy(cipher->iv32,IV);
899
900 return inputLen;
901 }
902
903 /*
904 +*****************************************************************************
905 *
906 * Function Name: blockDecrypt
907 *
908 * Function: Decrypt block(s) of data using Twofish
909 *
910 * Arguments: cipher = ptr to already initialized cipherInstance
911 * key = ptr to already initialized keyInstance
912 * input = ptr to data blocks to be decrypted
913 * inputLen = # bits to encrypt (multiple of blockSize)
914 * outBuffer = ptr to where to put decrypted blocks
915 *
916 * Return: # bits ciphered (>= 0)
917 * else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL)
918 *
919 * Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits.
920 * If inputLen is not a multiple of BLOCK_SIZE bits in those modes,
921 * an error BAD_INPUT_LEN is returned. In CFB1 mode, all block
922 * sizes can be supported.
923 *
924 -****************************************************************************/
925 static int blockDecrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input,
926 int inputLen, BYTE *outBuffer)
927 {
928 int i,n; /* loop counters */
929 DWORD x[BLOCK_SIZE/32]; /* block being encrypted */
930 DWORD t0,t1; /* temp variables */
931 int rounds=key->numRounds; /* number of rounds */
932 BYTE bit,bit0,ctBit,carry; /* temps for CFB */
933
934 /* make local copies of things for faster access */
935 int mode = cipher->mode;
936 DWORD sk[TOTAL_SUBKEYS];
937 DWORD IV[BLOCK_SIZE/32];
938
939 GetSboxKey;
940
941 #if VALIDATE_PARMS
942 if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG))
943 return BAD_CIPHER_STATE;
944 if ((key == NULL) || (key->keySig != VALID_SIG))
945 return BAD_KEY_INSTANCE;
946 if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1))
947 return BAD_KEY_INSTANCE;
948 if ((cipher->mode != MODE_CFB1) && (inputLen % BLOCK_SIZE))
949 return BAD_INPUT_LEN;
950 #if ALIGN32
951 if ( (((int)cipher) & 3) || (((int)key ) & 3) ||
952 (((int)input) & 3) || (((int)outBuffer) & 3))
953 return BAD_ALIGN32;
954 #endif
955 #endif
956
957 if (cipher->mode == MODE_CFB1)
958 { /* use blockEncrypt here to handle CFB, one block at a time */
959 cipher->mode = MODE_ECB; /* do encryption in ECB */
960 for (n=0;n<inputLen;n++)
961 {
962 blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x);
963 bit0 = 0x80 >> (n & 7);
964 ctBit = input[n/8] & bit0;
965 outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) |
966 (ctBit ^ ((((BYTE *) x)[0] & 0x80) >> (n&7)));
967 carry = ctBit >> (7 - (n&7));
968 for (i=BLOCK_SIZE/8-1;i>=0;i--)
969 {
970 bit = cipher->IV[i] >> 7; /* save next "carry" from shift */
971 cipher->IV[i] = (cipher->IV[i] << 1) ^ carry;
972 carry = bit;
973 }
974 }
975 cipher->mode = MODE_CFB1; /* restore mode for next time */
976 return inputLen;
977 }
978
979 /* here for ECB, CBC modes */
980 if (key->direction != DIR_DECRYPT)
981 ReverseRoundSubkeys(key,DIR_DECRYPT); /* reverse the round subkey order */
982 #ifdef USE_ASM
983 if ((useAsm & 2) && (inputLen))
984 #ifdef COMPILE_KEY
985 if (key->keySig == VALID_SIG)
986 return ((CipherProc *)(key->decryptFuncPtr))(cipher,key,input,inputLen,outBuffer);
987 #else
988 return (*blockDecrypt_86)(cipher,key,input,inputLen,outBuffer);
989 #endif
990 #endif
991 /* make local copy of subkeys for speed */
992 memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds));
993 if (mode == MODE_CBC)
994 BlockCopy(IV,cipher->iv32)
995 else
996 IV[0]=IV[1]=IV[2]=IV[3]=0;
997
998 for (n=0;n<inputLen;n+=BLOCK_SIZE,input+=BLOCK_SIZE/8,outBuffer+=BLOCK_SIZE/8)
999 {
1000 #define LoadBlockD(N) x[N^2]=Bswap(((DWORD *)input)[N]) ^ sk[OUTPUT_WHITEN+N]
1001 LoadBlockD(0); LoadBlockD(1); LoadBlockD(2); LoadBlockD(3);
1002
1003 #define DecryptRound(K,R,id) \
1004 t0 = Fe32##id(x[K ],0); \
1005 t1 = Fe32##id(x[K^1],3); \
1006 x[K^2] = ROL (x[K^2],1); \
1007 x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \
1008 x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \
1009 x[K^3] = ROR (x[K^3],1);
1010
1011 #define Decrypt2(R,id) { DecryptRound(2,R+1,id); DecryptRound(0,R,id); }
1012
1013 #if defined(ZERO_KEY)
1014 switch (key->keyLen)
1015 {
1016 case 128:
1017 for (i=rounds-2;i>=0;i-=2)
1018 Decrypt2(i,_128);
1019 break;
1020 case 192:
1021 for (i=rounds-2;i>=0;i-=2)
1022 Decrypt2(i,_192);
1023 break;
1024 case 256:
1025 for (i=rounds-2;i>=0;i-=2)
1026 Decrypt2(i,_256);
1027 break;
1028 }
1029 #else
1030 {
1031 Decrypt2(14,_);
1032 Decrypt2(12,_);
1033 Decrypt2(10,_);
1034 Decrypt2( 8,_);
1035 Decrypt2( 6,_);
1036 Decrypt2( 4,_);
1037 Decrypt2( 2,_);
1038 Decrypt2( 0,_);
1039 }
1040 #endif
1041 if (cipher->mode == MODE_ECB)
1042 {
1043 #if LittleEndian
1044 #define StoreBlockD(N) ((DWORD *)outBuffer)[N] = x[N] ^ sk[INPUT_WHITEN+N]
1045 #else
1046 #define StoreBlockD(N) { t0=x[N]^sk[INPUT_WHITEN+N]; ((DWORD *)outBuffer)[N] = Bswap(t0); }
1047 #endif
1048 StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3);
1049 #undef StoreBlockD
1050 continue;
1051 }
1052 else
1053 {
1054 #define StoreBlockD(N) x[N] ^= sk[INPUT_WHITEN+N] ^ IV[N]; \
1055 IV[N] = Bswap(((DWORD *)input)[N]); \
1056 ((DWORD *)outBuffer)[N] = Bswap(x[N]);
1057 StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3);
1058 #undef StoreBlockD
1059 }
1060 }
1061 if (mode == MODE_CBC) /* restore iv32 to cipher */
1062 BlockCopy(cipher->iv32,IV)
1063
1064 return inputLen;
1065 }
1066
1067 #ifdef GetCodeSize
1068 static DWORD TwofishCodeSize(void)
1069 {
1070 DWORD x= Here(0);
1071 #ifdef USE_ASM
1072 if (useAsm & 3)
1073 return TwofishAsmCodeSize();
1074 #endif
1075 return x - TwofishCodeStart();
1076 };
1077 #endif