ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Crypt-Twofish2/twofish.c
Revision: 1.3
Committed: Sun Aug 1 12:32:06 2021 UTC (3 years, 3 months ago) by root
Content type: text/plain
Branch: MAIN
Changes since 1.2: +25 -25 lines
Log Message:
whitespace

File Contents

# Content
1 /***************************************************************************
2 TWOFISH2.C -- Optimized C API calls for TWOFISH AES submission
3
4 Submitters:
5 Bruce Schneier, Counterpane Systems
6 Doug Whiting, Hi/fn
7 John Kelsey, Counterpane Systems
8 Chris Hall, Counterpane Systems
9 David Wagner, UC Berkeley
10
11 Code Author: Doug Whiting, Hi/fn
12
13 Version 1.00 April 1998
14
15 Copyright 1998, Hi/fn and Counterpane Systems. All rights reserved.
16
17 Notes:
18 * Optimized version
19 * Tab size is set to 4 characters in this file
20
21 ***************************************************************************/
22 #include "aes.h"
23 #include "table.h"
24
25 #include <memory.h>
26 /*#include <assert.h>*/
27
28 #if defined(min_key) && !defined(MIN_KEY)
29 #define MIN_KEY 1 /* toupper() */
30 #elif defined(part_key) && !defined(PART_KEY)
31 #define PART_KEY 1
32 #elif defined(zero_key) && !defined(ZERO_KEY)
33 #define ZERO_KEY 1
34 #endif
35
36
37 #ifdef USE_ASM
38 extern int useAsm; /* ok to use ASM code? */
39
40 typedef int cdecl CipherProc
41 (cipherInstance *cipher, keyInstance *key,BYTE *input,int inputLen,BYTE *outBuffer);
42 typedef int cdecl KeySetupProc(keyInstance *key);
43
44 extern CipherProc *blockEncrypt_86; /* ptr to ASM functions */
45 extern CipherProc *blockDecrypt_86;
46 extern KeySetupProc *reKey_86;
47 extern DWORD cdecl TwofishAsmCodeSize(void);
48 #endif
49
50 /*
51 +*****************************************************************************
52 * Constants/Macros/Tables
53 -****************************************************************************/
54
55 #define CONST /* help syntax from C++, NOP here */
56
57 static CONST fullSbox MDStab; /* not actually const. Initialized ONE time */
58 static int needToBuildMDS=1; /* is MDStab initialized yet? */
59
60 #define BIG_TAB 0
61
62 #if BIG_TAB
63 static BYTE bigTab[4][256][256]; /* pre-computed S-box */
64 #endif
65
66 /* number of rounds for various key sizes: 128, 192, 256 */
67 /* (ignored for now in optimized code!) */
68 static CONST int numRounds[4]= {0,ROUNDS_128,ROUNDS_192,ROUNDS_256};
69
70 #if REENTRANT
71 #define _sBox_ key->sBox8x32
72 #else
73 static fullSbox _sBox_; /* permuted MDStab based on keys */
74 #endif
75 #define _sBox8_(N) (((BYTE *) _sBox_) + (N)*256)
76
77 /*------- see what level of S-box precomputation we need to do -----*/
78 #if defined(ZERO_KEY)
79 #define MOD_STRING "(Zero S-box keying)"
80 #define Fe32_128(x,R) \
81 ( MDStab[0][p8(01)[p8(02)[_b(x,R )]^b0(SKEY[1])]^b0(SKEY[0])] ^ \
82 MDStab[1][p8(11)[p8(12)[_b(x,R+1)]^b1(SKEY[1])]^b1(SKEY[0])] ^ \
83 MDStab[2][p8(21)[p8(22)[_b(x,R+2)]^b2(SKEY[1])]^b2(SKEY[0])] ^ \
84 MDStab[3][p8(31)[p8(32)[_b(x,R+3)]^b3(SKEY[1])]^b3(SKEY[0])] )
85 #define Fe32_192(x,R) \
86 ( MDStab[0][p8(01)[p8(02)[p8(03)[_b(x,R )]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \
87 MDStab[1][p8(11)[p8(12)[p8(13)[_b(x,R+1)]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \
88 MDStab[2][p8(21)[p8(22)[p8(23)[_b(x,R+2)]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \
89 MDStab[3][p8(31)[p8(32)[p8(33)[_b(x,R+3)]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] )
90 #define Fe32_256(x,R) \
91 ( MDStab[0][p8(01)[p8(02)[p8(03)[p8(04)[_b(x,R )]^b0(SKEY[3])]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \
92 MDStab[1][p8(11)[p8(12)[p8(13)[p8(14)[_b(x,R+1)]^b1(SKEY[3])]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \
93 MDStab[2][p8(21)[p8(22)[p8(23)[p8(24)[_b(x,R+2)]^b2(SKEY[3])]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \
94 MDStab[3][p8(31)[p8(32)[p8(33)[p8(34)[_b(x,R+3)]^b3(SKEY[3])]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] )
95
96 #define GetSboxKey DWORD SKEY[4]; /* local copy */ \
97 memcpy(SKEY,key->sboxKeys,sizeof(SKEY));
98 /*----------------------------------------------------------------*/
99 #elif defined(MIN_KEY)
100 #define MOD_STRING "(Minimal keying)"
101 #define Fe32_(x,R)(MDStab[0][p8(01)[_sBox8_(0)[_b(x,R )]] ^ b0(SKEY0)] ^ \
102 MDStab[1][p8(11)[_sBox8_(1)[_b(x,R+1)]] ^ b1(SKEY0)] ^ \
103 MDStab[2][p8(21)[_sBox8_(2)[_b(x,R+2)]] ^ b2(SKEY0)] ^ \
104 MDStab[3][p8(31)[_sBox8_(3)[_b(x,R+3)]] ^ b3(SKEY0)])
105 #define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; }
106 #define GetSboxKey DWORD SKEY0 = key->sboxKeys[0] /* local copy */
107 /*----------------------------------------------------------------*/
108 #elif defined(PART_KEY)
109 #define MOD_STRING "(Partial keying)"
110 #define Fe32_(x,R)(MDStab[0][_sBox8_(0)[_b(x,R )]] ^ \
111 MDStab[1][_sBox8_(1)[_b(x,R+1)]] ^ \
112 MDStab[2][_sBox8_(2)[_b(x,R+2)]] ^ \
113 MDStab[3][_sBox8_(3)[_b(x,R+3)]])
114 #define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; }
115 #define GetSboxKey
116 /*----------------------------------------------------------------*/
117 #else /* default is FULL_KEY */
118 #ifndef FULL_KEY
119 #define FULL_KEY 1
120 #endif
121 #if BIG_TAB
122 #define TAB_STR " (Big table)"
123 #else
124 #define TAB_STR
125 #endif
126 #ifdef COMPILE_KEY
127 #define MOD_STRING "(Compiled subkeys)" TAB_STR
128 #else
129 #define MOD_STRING "(Full keying)" TAB_STR
130 #endif
131 /* Fe32_ does a full S-box + MDS lookup. Need to #define _sBox_ before use.
132 Note that we "interleave" 0,1, and 2,3 to avoid cache bank collisions
133 in optimized assembly language.
134 */
135 #define Fe32_(x,R) (_sBox_[0][2*_b(x,R )] ^ _sBox_[0][2*_b(x,R+1)+1] ^ \
136 _sBox_[2][2*_b(x,R+2)] ^ _sBox_[2][2*_b(x,R+3)+1])
137 /* set a single S-box value, given the input byte */
138 #define sbSet(N,i,J,v) { _sBox_[N&2][2*i+(N&1)+2*J]=MDStab[N][v]; }
139 #define GetSboxKey
140 #endif
141
142 /* macro(s) for debugging help */
143 #define CHECK_TABLE 0 /* nonzero --> compare against "slow" table */
144 #define VALIDATE_PARMS 0 /* disable for full speed */
145
146 /* end of debug macros */
147
148 #ifdef GetCodeSize
149 static extern DWORD Here(DWORD x); /* return caller's address! */
150 static DWORD TwofishCodeStart(void) { return Here(0); }
151 #endif
152
153 /*
154 +*****************************************************************************
155 *
156 * Function Name: TableOp
157 *
158 * Function: Handle table use checking
159 *
160 * Arguments: op = what to do (see TAB_* defns in AES.H)
161 *
162 * Return: TRUE --> done (for TAB_QUERY)
163 *
164 * Notes: This routine is for use in generating the tables KAT file.
165 * For this optimized version, we don't actually track table usage,
166 * since it would make the macros incredibly ugly. Instead we just
167 * run for a fixed number of queries and then say we're done.
168 *
169 -****************************************************************************/
170 static int TableOp(int op)
171 {
172 static int queryCnt=0;
173
174 switch (op)
175 {
176 case TAB_DISABLE:
177 break;
178 case TAB_ENABLE:
179 break;
180 case TAB_RESET:
181 queryCnt=0;
182 break;
183 case TAB_QUERY:
184 queryCnt++;
185 if (queryCnt < TAB_MIN_QUERY)
186 return FALSE;
187 }
188 return TRUE;
189 }
190
191
192 #if CHECK_TABLE
193 /*
194 +*****************************************************************************
195 *
196 * Function Name: f32
197 *
198 * Function: Run four bytes through keyed S-boxes and apply MDS matrix
199 *
200 * Arguments: x = input to f function
201 * k32 = pointer to key dwords
202 * keyLen = total key length (k32 --> keyLey/2 bits)
203 *
204 * Return: The output of the keyed permutation applied to x.
205 *
206 * Notes:
207 * This function is a keyed 32-bit permutation. It is the major building
208 * block for the Twofish round function, including the four keyed 8x8
209 * permutations and the 4x4 MDS matrix multiply. This function is used
210 * both for generating round subkeys and within the round function on the
211 * block being encrypted.
212 *
213 * This version is fairly slow and pedagogical, although a smartcard would
214 * probably perform the operation exactly this way in firmware. For
215 * ultimate performance, the entire operation can be completed with four
216 * lookups into four 256x32-bit tables, with three dword xors.
217 *
218 * The MDS matrix is defined in TABLE.H. To multiply by Mij, just use the
219 * macro Mij(x).
220 *
221 -****************************************************************************/
222 static DWORD f32(DWORD x,CONST DWORD *k32,int keyLen)
223 {
224 BYTE b[4];
225
226 /* Run each byte thru 8x8 S-boxes, xoring with key byte at each stage. */
227 /* Note that each byte goes through a different combination of S-boxes.*/
228
229 *((DWORD *)b) = Bswap(x); /* make b[0] = LSB, b[3] = MSB */
230 switch (((keyLen + 63)/64) & 3)
231 {
232 case 0: /* 256 bits of key */
233 b[0] = p8(04)[b[0]] ^ b0(k32[3]);
234 b[1] = p8(14)[b[1]] ^ b1(k32[3]);
235 b[2] = p8(24)[b[2]] ^ b2(k32[3]);
236 b[3] = p8(34)[b[3]] ^ b3(k32[3]);
237 /* fall thru, having pre-processed b[0]..b[3] with k32[3] */
238 case 3: /* 192 bits of key */
239 b[0] = p8(03)[b[0]] ^ b0(k32[2]);
240 b[1] = p8(13)[b[1]] ^ b1(k32[2]);
241 b[2] = p8(23)[b[2]] ^ b2(k32[2]);
242 b[3] = p8(33)[b[3]] ^ b3(k32[2]);
243 /* fall thru, having pre-processed b[0]..b[3] with k32[2] */
244 case 2: /* 128 bits of key */
245 b[0] = p8(00)[p8(01)[p8(02)[b[0]] ^ b0(k32[1])] ^ b0(k32[0])];
246 b[1] = p8(10)[p8(11)[p8(12)[b[1]] ^ b1(k32[1])] ^ b1(k32[0])];
247 b[2] = p8(20)[p8(21)[p8(22)[b[2]] ^ b2(k32[1])] ^ b2(k32[0])];
248 b[3] = p8(30)[p8(31)[p8(32)[b[3]] ^ b3(k32[1])] ^ b3(k32[0])];
249 }
250
251 /* Now perform the MDS matrix multiply inline. */
252 return ((M00(b[0]) ^ M01(b[1]) ^ M02(b[2]) ^ M03(b[3])) ) ^
253 ((M10(b[0]) ^ M11(b[1]) ^ M12(b[2]) ^ M13(b[3])) << 8) ^
254 ((M20(b[0]) ^ M21(b[1]) ^ M22(b[2]) ^ M23(b[3])) << 16) ^
255 ((M30(b[0]) ^ M31(b[1]) ^ M32(b[2]) ^ M33(b[3])) << 24) ;
256 }
257 #endif /* CHECK_TABLE */
258
259
260 /*
261 +*****************************************************************************
262 *
263 * Function Name: RS_MDS_encode
264 *
265 * Function: Use (12,8) Reed-Solomon code over GF(256) to produce
266 * a key S-box dword from two key material dwords.
267 *
268 * Arguments: k0 = 1st dword
269 * k1 = 2nd dword
270 *
271 * Return: Remainder polynomial generated using RS code
272 *
273 * Notes:
274 * Since this computation is done only once per reKey per 64 bits of key,
275 * the performance impact of this routine is imperceptible. The RS code
276 * chosen has "simple" coefficients to allow smartcard/hardware implementation
277 * without lookup tables.
278 *
279 -****************************************************************************/
280 static DWORD RS_MDS_Encode(DWORD k0,DWORD k1)
281 {
282 int i,j;
283 DWORD r;
284
285 for (i=r=0;i<2;i++)
286 {
287 r ^= (i) ? k0 : k1; /* merge in 32 more key bits */
288 for (j=0;j<4;j++) /* shift one byte at a time */
289 RS_rem(r);
290 }
291 return r;
292 }
293
294
295 /*
296 +*****************************************************************************
297 *
298 * Function Name: BuildMDS
299 *
300 * Function: Initialize the MDStab array
301 *
302 * Arguments: None.
303 *
304 * Return: None.
305 *
306 * Notes:
307 * Here we precompute all the fixed MDS table. This only needs to be done
308 * one time at initialization, after which the table is "CONST".
309 *
310 -****************************************************************************/
311 static void BuildMDS(void)
312 {
313 int i;
314 DWORD d;
315 BYTE m1[2],mX[2],mY[4];
316
317 for (i=0;i<256;i++)
318 {
319 m1[0]=P8x8[0][i]; /* compute all the matrix elements */
320 mX[0]=(BYTE) Mul_X(m1[0]);
321 mY[0]=(BYTE) Mul_Y(m1[0]);
322
323 m1[1]=P8x8[1][i];
324 mX[1]=(BYTE) Mul_X(m1[1]);
325 mY[1]=(BYTE) Mul_Y(m1[1]);
326
327 #undef Mul_1 /* change what the pre-processor does with Mij */
328 #undef Mul_X
329 #undef Mul_Y
330 #define Mul_1 m1 /* It will now access m01[], m5B[], and mEF[] */
331 #define Mul_X mX
332 #define Mul_Y mY
333
334 #define SetMDS(N) \
335 b0(d) = M0##N[P_##N##0]; \
336 b1(d) = M1##N[P_##N##0]; \
337 b2(d) = M2##N[P_##N##0]; \
338 b3(d) = M3##N[P_##N##0]; \
339 MDStab[N][i] = d;
340
341 SetMDS(0); /* fill in the matrix with elements computed above */
342 SetMDS(1);
343 SetMDS(2);
344 SetMDS(3);
345 }
346 #undef Mul_1
347 #undef Mul_X
348 #undef Mul_Y
349 #define Mul_1 Mx_1 /* re-enable true multiply */
350 #define Mul_X Mx_X
351 #define Mul_Y Mx_Y
352
353 #if BIG_TAB
354 {
355 int j,k;
356 BYTE *q0,*q1;
357
358 for (i=0;i<4;i++)
359 {
360 switch (i)
361 {
362 case 0: q0=p8(01); q1=p8(02); break;
363 case 1: q0=p8(11); q1=p8(12); break;
364 case 2: q0=p8(21); q1=p8(22); break;
365 case 3: q0=p8(31); q1=p8(32); break;
366 }
367 for (j=0;j<256;j++)
368 for (k=0;k<256;k++)
369 bigTab[i][j][k]=q0[q1[k]^j];
370 }
371 }
372 #endif
373
374 needToBuildMDS=0; /* NEVER modify the table again! */
375 }
376
377 /*
378 +*****************************************************************************
379 *
380 * Function Name: ReverseRoundSubkeys
381 *
382 * Function: Reverse order of round subkeys to switch between encrypt/decrypt
383 *
384 * Arguments: key = ptr to keyInstance to be reversed
385 * newDir = new direction value
386 *
387 * Return: None.
388 *
389 * Notes:
390 * This optimization allows both blockEncrypt and blockDecrypt to use the same
391 * "fallthru" switch statement based on the number of rounds.
392 * Note that key->numRounds must be even and >= 2 here.
393 *
394 -****************************************************************************/
395 static void ReverseRoundSubkeys(keyInstance *key,BYTE newDir)
396 {
397 DWORD t0,t1;
398 register DWORD *r0=key->subKeys+ROUND_SUBKEYS;
399 register DWORD *r1=r0 + 2*key->numRounds - 2;
400
401 for (;r0 < r1;r0+=2,r1-=2)
402 {
403 t0=r0[0]; /* swap the order */
404 t1=r0[1];
405 r0[0]=r1[0]; /* but keep relative order within pairs */
406 r0[1]=r1[1];
407 r1[0]=t0;
408 r1[1]=t1;
409 }
410
411 key->direction=newDir;
412 }
413
414 /*
415 +*****************************************************************************
416 *
417 * Function Name: Xor256
418 *
419 * Function: Copy an 8-bit permutation (256 bytes), xoring with a byte
420 *
421 * Arguments: dst = where to put result
422 * src = where to get data (can be same asa dst)
423 * b = byte to xor
424 *
425 * Return: None
426 *
427 * Notes:
428 * BorlandC's optimization is terrible! When we put the code inline,
429 * it generates fairly good code in the *following* segment (not in the Xor256
430 * code itself). If the call is made, the code following the call is awful!
431 * The penalty is nearly 50%! So we take the code size hit for inlining for
432 * Borland, while Microsoft happily works with a call.
433 *
434 -****************************************************************************/
435 #if defined(__BORLANDC__) /* do it inline */
436 #define Xor32(dst,src,i) { ((DWORD *)dst)[i] = ((DWORD *)src)[i] ^ tmpX; }
437 #define Xor256(dst,src,b) \
438 { \
439 register DWORD tmpX=0x01010101u * b;\
440 for (i=0;i<64;i+=4) \
441 { Xor32(dst,src,i ); Xor32(dst,src,i+1); Xor32(dst,src,i+2); Xor32(dst,src,i+3); } \
442 }
443 #else /* do it as a function call */
444 static void Xor256(void *dst,void *src,BYTE b)
445 {
446 register DWORD x=b*0x01010101u; /* replicate byte to all four bytes */
447 register DWORD *d=(DWORD *)dst;
448 register DWORD *s=(DWORD *)src;
449 #define X_8(N) { d[N]=s[N] ^ x; d[N+1]=s[N+1] ^ x; }
450 #define X_32(N) { X_8(N); X_8(N+2); X_8(N+4); X_8(N+6); }
451 X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */
452 d+=32; /* keep offsets small! */
453 s+=32;
454 X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */
455 }
456 #endif
457
458 /*
459 +*****************************************************************************
460 *
461 * Function Name: reKey
462 *
463 * Function: Initialize the Twofish key schedule from key32
464 *
465 * Arguments: key = ptr to keyInstance to be initialized
466 *
467 * Return: TRUE on success
468 *
469 * Notes:
470 * Here we precompute all the round subkeys, although that is not actually
471 * required. For example, on a smartcard, the round subkeys can
472 * be generated on-the-fly using f32()
473 *
474 -****************************************************************************/
475 static int reKey(keyInstance *key)
476 {
477 int i,j,k64Cnt,keyLen;
478 int subkeyCnt;
479 DWORD A=0,B=0,q;
480 DWORD sKey[MAX_KEY_BITS/64],k32e[MAX_KEY_BITS/64],k32o[MAX_KEY_BITS/64];
481 BYTE L0[256],L1[256]; /* small local 8-bit permutations */
482
483 #if VALIDATE_PARMS
484 #if ALIGN32
485 if (((int)key) & 3)
486 return BAD_ALIGN32;
487 if ((key->keyLen % 64) || (key->keyLen < MIN_KEY_BITS))
488 return BAD_KEY_INSTANCE;
489 #endif
490 #endif
491
492 if (needToBuildMDS) /* do this one time only */
493 BuildMDS();
494
495 #define F32(res,x,k32) \
496 { \
497 DWORD t=x; \
498 switch (k64Cnt & 3) \
499 { \
500 case 0: /* same as 4 */ \
501 b0(t) = p8(04)[b0(t)] ^ b0(k32[3]); \
502 b1(t) = p8(14)[b1(t)] ^ b1(k32[3]); \
503 b2(t) = p8(24)[b2(t)] ^ b2(k32[3]); \
504 b3(t) = p8(34)[b3(t)] ^ b3(k32[3]); \
505 /* fall thru, having pre-processed t */ \
506 case 3: b0(t) = p8(03)[b0(t)] ^ b0(k32[2]); \
507 b1(t) = p8(13)[b1(t)] ^ b1(k32[2]); \
508 b2(t) = p8(23)[b2(t)] ^ b2(k32[2]); \
509 b3(t) = p8(33)[b3(t)] ^ b3(k32[2]); \
510 /* fall thru, having pre-processed t */ \
511 case 2: /* 128-bit keys (optimize for this case) */ \
512 res= MDStab[0][p8(01)[p8(02)[b0(t)] ^ b0(k32[1])] ^ b0(k32[0])] ^ \
513 MDStab[1][p8(11)[p8(12)[b1(t)] ^ b1(k32[1])] ^ b1(k32[0])] ^ \
514 MDStab[2][p8(21)[p8(22)[b2(t)] ^ b2(k32[1])] ^ b2(k32[0])] ^ \
515 MDStab[3][p8(31)[p8(32)[b3(t)] ^ b3(k32[1])] ^ b3(k32[0])] ; \
516 } \
517 }
518
519
520 #if !CHECK_TABLE
521 #if defined(USE_ASM) /* only do this if not using assember */
522 if (!(useAsm & 4))
523 #endif
524 #endif
525 {
526 subkeyCnt = ROUND_SUBKEYS + 2*key->numRounds;
527 keyLen=key->keyLen;
528 k64Cnt=(keyLen+63)/64; /* number of 64-bit key words */
529 for (i=0,j=k64Cnt-1;i<k64Cnt;i++,j--)
530 { /* split into even/odd key dwords */
531 k32e[i]=key->key32[2*i ];
532 k32o[i]=key->key32[2*i+1];
533 /* compute S-box keys using (12,8) Reed-Solomon code over GF(256) */
534 sKey[j]=key->sboxKeys[j]=RS_MDS_Encode(k32e[i],k32o[i]); /* reverse order */
535 }
536 }
537
538 #ifdef USE_ASM
539 if (useAsm & 4)
540 {
541 #if defined(COMPILE_KEY) && defined(USE_ASM)
542 key->keySig = VALID_SIG; /* show that we are initialized */
543 key->codeSize = sizeof(key->compiledCode); /* set size */
544 #endif
545 reKey_86(key);
546 }
547 else
548 #endif
549 {
550 for (i=q=0;i<subkeyCnt/2;i++,q+=SK_STEP)
551 { /* compute round subkeys for PHT */
552 F32(A,q ,k32e); /* A uses even key dwords */
553 F32(B,q+SK_BUMP,k32o); /* B uses odd key dwords */
554 B = ROL(B,8);
555 key->subKeys[2*i ] = A+B; /* combine with a PHT */
556 B = A + 2*B;
557 key->subKeys[2*i+1] = ROL(B,SK_ROTL);
558 }
559 #if !defined(ZERO_KEY)
560 switch (keyLen) /* case out key length for speed in generating S-boxes */
561 {
562 case 128:
563 #if defined(FULL_KEY) || defined(PART_KEY)
564 #if BIG_TAB
565 #define one128(N,J) sbSet(N,i,J,L0[i+J])
566 #define sb128(N) { \
567 BYTE *qq=bigTab[N][b##N(sKey[1])]; \
568 Xor256(L0,qq,b##N(sKey[0])); \
569 for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } }
570 #else
571 #define one128(N,J) sbSet(N,i,J,p8(N##1)[L0[i+J]]^k0)
572 #define sb128(N) { \
573 Xor256(L0,p8(N##2),b##N(sKey[1])); \
574 { register DWORD k0=b##N(sKey[0]); \
575 for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } } }
576 #endif
577 #elif defined(MIN_KEY)
578 #define sb128(N) Xor256(_sBox8_(N),p8(N##2),b##N(sKey[1]))
579 #endif
580 sb128(0); sb128(1); sb128(2); sb128(3);
581 break;
582 case 192:
583 #if defined(FULL_KEY) || defined(PART_KEY)
584 #define one192(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0)
585 #define sb192(N) { \
586 Xor256(L0,p8(N##3),b##N(sKey[2])); \
587 { register DWORD k0=b##N(sKey[0]); \
588 register DWORD k1=b##N(sKey[1]); \
589 for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } }
590 #elif defined(MIN_KEY)
591 #define one192(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1)
592 #define sb192(N) { \
593 Xor256(L0,p8(N##3),b##N(sKey[2])); \
594 { register DWORD k1=b##N(sKey[1]); \
595 for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } }
596 #endif
597 sb192(0); sb192(1); sb192(2); sb192(3);
598 break;
599 case 256:
600 #if defined(FULL_KEY) || defined(PART_KEY)
601 #define one256(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0)
602 #define sb256(N) { \
603 Xor256(L1,p8(N##4),b##N(sKey[3])); \
604 for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \
605 L0[i+1]=p8(N##3)[L1[i+1]]; } \
606 Xor256(L0,L0,b##N(sKey[2])); \
607 { register DWORD k0=b##N(sKey[0]); \
608 register DWORD k1=b##N(sKey[1]); \
609 for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } }
610 #elif defined(MIN_KEY)
611 #define one256(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1)
612 #define sb256(N) { \
613 Xor256(L1,p8(N##4),b##N(sKey[3])); \
614 for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \
615 L0[i+1]=p8(N##3)[L1[i+1]]; } \
616 Xor256(L0,L0,b##N(sKey[2])); \
617 { register DWORD k1=b##N(sKey[1]); \
618 for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } }
619 #endif
620 sb256(0); sb256(1); sb256(2); sb256(3);
621 break;
622 }
623 #endif
624 }
625
626 #if CHECK_TABLE /* sanity check vs. pedagogical code*/
627 {
628 GetSboxKey;
629 for (i=0;i<subkeyCnt/2;i++)
630 {
631 A = f32(i*SK_STEP ,k32e,keyLen); /* A uses even key dwords */
632 B = f32(i*SK_STEP+SK_BUMP,k32o,keyLen); /* B uses odd key dwords */
633 B = ROL(B,8);
634 assert(key->subKeys[2*i ] == A+ B);
635 assert(key->subKeys[2*i+1] == ROL(A+2*B,SK_ROTL));
636 }
637 #if !defined(ZERO_KEY) /* any S-boxes to check? */
638 for (i=q=0;i<256;i++,q+=0x01010101)
639 assert(f32(q,key->sboxKeys,keyLen) == Fe32_(q,0));
640 #endif
641 }
642 #endif /* CHECK_TABLE */
643
644 if (key->direction == DIR_ENCRYPT)
645 ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */
646
647 return TRUE;
648 }
649 /*
650 +*****************************************************************************
651 *
652 * Function Name: makeKey
653 *
654 * Function: Initialize the Twofish key schedule
655 *
656 * Arguments: key = ptr to keyInstance to be initialized
657 * direction = DIR_ENCRYPT or DIR_DECRYPT
658 * keyLen = # bits of key text at *keyMaterial
659 * keyMaterial = ptr to hex ASCII chars representing key bits
660 *
661 * Return: TRUE on success
662 * else error code (e.g., BAD_KEY_DIR)
663 *
664 * Notes: This parses the key bits from keyMaterial. Zeroes out unused key bits
665 *
666 -****************************************************************************/
667 static int makeKey(keyInstance *key, BYTE direction, int keyLen,CONST char *keyMaterial)
668 {
669 int i;
670
671 #if VALIDATE_PARMS /* first, sanity check on parameters */
672 if (key == NULL)
673 return BAD_KEY_INSTANCE;/* must have a keyInstance to initialize */
674 if ((direction != DIR_ENCRYPT) && (direction != DIR_DECRYPT))
675 return BAD_KEY_DIR; /* must have valid direction */
676 if ((keyLen > MAX_KEY_BITS) || (keyLen < 8) || (keyLen & 0x3F))
677 return BAD_KEY_MAT; /* length must be valid */
678 key->keySig = VALID_SIG; /* show that we are initialized */
679 #if ALIGN32
680 if ((((int)key) & 3) || (((int)key->key32) & 3))
681 return BAD_ALIGN32;
682 #endif
683 #endif
684
685 key->direction = direction;/* set our cipher direction */
686 key->keyLen = (keyLen+63) & ~63; /* round up to multiple of 64 */
687 key->numRounds = numRounds[(keyLen-1)/64];
688 memset(key->key32,0,sizeof(key->key32)); /* zero unused bits */
689
690 if (keyMaterial == NULL)
691 return TRUE; /* allow a "dummy" call */
692
693 for (i=0;i<keyLen/32;i++) /* make byte-oriented copy for CFB1 */
694 key->key32[i] = (((unsigned char *)keyMaterial)[i*4+0] << 0)
695 | (((unsigned char *)keyMaterial)[i*4+1] << 8)
696 | (((unsigned char *)keyMaterial)[i*4+2] << 16)
697 | (((unsigned char *)keyMaterial)[i*4+3] << 24);
698
699 return reKey(key); /* generate round subkeys */
700 }
701
702
703 /*
704 +*****************************************************************************
705 *
706 * Function Name: cipherInit
707 *
708 * Function: Initialize the Twofish cipher in a given mode
709 *
710 * Arguments: cipher = ptr to cipherInstance to be initialized
711 * mode = MODE_ECB, MODE_CBC, or MODE_CFB1
712 * IV = ptr to hex ASCII test representing IV bytes
713 *
714 * Return: TRUE on success
715 * else error code (e.g., BAD_CIPHER_MODE)
716 *
717 -****************************************************************************/
718 static int cipherInit(cipherInstance *cipher, BYTE mode,CONST char *IV)
719 {
720 int i;
721 #if VALIDATE_PARMS /* first, sanity check on parameters */
722 if (cipher == NULL)
723 return BAD_PARAMS; /* must have a cipherInstance to initialize */
724 if ((mode != MODE_ECB) && (mode != MODE_CBC) && (mode != MODE_CFB1))
725 return BAD_CIPHER_MODE; /* must have valid cipher mode */
726 cipher->cipherSig = VALID_SIG;
727 #if ALIGN32
728 if ((((int)cipher) & 3) || (((int)cipher->IV) & 3) || (((int)cipher->iv32) & 3))
729 return BAD_ALIGN32;
730 #endif
731 #endif
732
733 if ((mode != MODE_ECB) && (IV)) /* parse the IV */
734 {
735 memcpy (cipher->iv32, IV, BLOCK_SIZE/32);
736 for (i=0;i<BLOCK_SIZE/32;i++) /* make byte-oriented copy for CFB1 */
737 ((DWORD *)cipher->IV)[i] = Bswap(cipher->iv32[i]);
738 }
739
740 cipher->mode = mode;
741
742 return TRUE;
743 }
744
745 /*
746 +*****************************************************************************
747 *
748 * Function Name: blockEncrypt
749 *
750 * Function: Encrypt block(s) of data using Twofish
751 *
752 * Arguments: cipher = ptr to already initialized cipherInstance
753 * key = ptr to already initialized keyInstance
754 * input = ptr to data blocks to be encrypted
755 * inputLen = # bits to encrypt (multiple of blockSize)
756 * outBuffer = ptr to where to put encrypted blocks
757 *
758 * Return: # bits ciphered (>= 0)
759 * else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL)
760 *
761 * Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits.
762 * If inputLen is not a multiple of BLOCK_SIZE bits in those modes,
763 * an error BAD_INPUT_LEN is returned. In CFB1 mode, all block
764 * sizes can be supported.
765 *
766 -****************************************************************************/
767 static int blockEncrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input,
768 int inputLen, BYTE *outBuffer)
769 {
770 int i,n; /* loop counters */
771 DWORD x[BLOCK_SIZE/32]; /* block being encrypted */
772 DWORD t0,t1; /* temp variables */
773 int rounds=key->numRounds; /* number of rounds */
774 BYTE bit,bit0,ctBit,carry; /* temps for CFB */
775
776 /* make local copies of things for faster access */
777 int mode = cipher->mode;
778 DWORD sk[TOTAL_SUBKEYS];
779 DWORD IV[BLOCK_SIZE/32];
780
781 GetSboxKey;
782
783 #if VALIDATE_PARMS
784 if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG))
785 return BAD_CIPHER_STATE;
786 if ((key == NULL) || (key->keySig != VALID_SIG))
787 return BAD_KEY_INSTANCE;
788 if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1))
789 return BAD_KEY_INSTANCE;
790 if ((mode != MODE_CFB1) && (inputLen % BLOCK_SIZE))
791 return BAD_INPUT_LEN;
792 #if ALIGN32
793 if ( (((int)cipher) & 3) || (((int)key ) & 3) ||
794 (((int)input ) & 3) || (((int)outBuffer) & 3))
795 return BAD_ALIGN32;
796 #endif
797 #endif
798
799 if (mode == MODE_CFB1)
800 { /* use recursion here to handle CFB, one block at a time */
801 cipher->mode = MODE_ECB; /* do encryption in ECB */
802 for (n=0;n<inputLen;n++)
803 {
804 blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x);
805 bit0 = 0x80 >> (n & 7);/* which bit position in byte */
806 ctBit = (input[n/8] & bit0) ^ ((((BYTE *) x)[0] & 0x80) >> (n&7));
807 outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) | ctBit;
808 carry = ctBit >> (7 - (n&7));
809 for (i=BLOCK_SIZE/8-1;i>=0;i--)
810 {
811 bit = cipher->IV[i] >> 7; /* save next "carry" from shift */
812 cipher->IV[i] = (cipher->IV[i] << 1) ^ carry;
813 carry = bit;
814 }
815 }
816 cipher->mode = MODE_CFB1; /* restore mode for next time */
817 return inputLen;
818 }
819
820 /* here for ECB, CBC modes */
821 if (key->direction != DIR_ENCRYPT)
822 ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */
823
824 #ifdef USE_ASM
825 if ((useAsm & 1) && (inputLen))
826 #ifdef COMPILE_KEY
827 if (key->keySig == VALID_SIG)
828 return ((CipherProc *)(key->encryptFuncPtr))(cipher,key,input,inputLen,outBuffer);
829 #else
830 return (*blockEncrypt_86)(cipher,key,input,inputLen,outBuffer);
831 #endif
832 #endif
833 /* make local copy of subkeys for speed */
834 memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds));
835 if (mode == MODE_CBC)
836 BlockCopy(IV,cipher->iv32)
837 else
838 IV[0]=IV[1]=IV[2]=IV[3]=0;
839
840 for (n=0;n<inputLen;n+=BLOCK_SIZE,input+=BLOCK_SIZE/8,outBuffer+=BLOCK_SIZE/8)
841 {
842 #define LoadBlockE(N) x[N]=Bswap(((DWORD *)input)[N]) ^ sk[INPUT_WHITEN+N] ^ IV[N]
843 LoadBlockE(0); LoadBlockE(1); LoadBlockE(2); LoadBlockE(3);
844 #define EncryptRound(K,R,id) \
845 t0 = Fe32##id(x[K ],0); \
846 t1 = Fe32##id(x[K^1],3); \
847 x[K^3] = ROL(x[K^3],1); \
848 x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \
849 x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \
850 x[K^2] = ROR(x[K^2],1);
851 #define Encrypt2(R,id) { EncryptRound(0,R+1,id); EncryptRound(2,R,id); }
852
853 #if defined(ZERO_KEY)
854 switch (key->keyLen)
855 {
856 case 128:
857 for (i=rounds-2;i>=0;i-=2)
858 Encrypt2(i,_128);
859 break;
860 case 192:
861 for (i=rounds-2;i>=0;i-=2)
862 Encrypt2(i,_192);
863 break;
864 case 256:
865 for (i=rounds-2;i>=0;i-=2)
866 Encrypt2(i,_256);
867 break;
868 }
869 #else
870 Encrypt2(14,_);
871 Encrypt2(12,_);
872 Encrypt2(10,_);
873 Encrypt2( 8,_);
874 Encrypt2( 6,_);
875 Encrypt2( 4,_);
876 Encrypt2( 2,_);
877 Encrypt2( 0,_);
878 #endif
879
880 /* need to do (or undo, depending on your point of view) final swap */
881 #if LittleEndian
882 #define StoreBlockE(N) ((DWORD *)outBuffer)[N]=x[N^2] ^ sk[OUTPUT_WHITEN+N]
883 #else
884 #define StoreBlockE(N) { t0=x[N^2] ^ sk[OUTPUT_WHITEN+N]; ((DWORD *)outBuffer)[N]=Bswap(t0); }
885 #endif
886 StoreBlockE(0); StoreBlockE(1); StoreBlockE(2); StoreBlockE(3);
887 if (mode == MODE_CBC)
888 {
889 IV[0]=Bswap(((DWORD *)outBuffer)[0]);
890 IV[1]=Bswap(((DWORD *)outBuffer)[1]);
891 IV[2]=Bswap(((DWORD *)outBuffer)[2]);
892 IV[3]=Bswap(((DWORD *)outBuffer)[3]);
893 }
894 }
895
896 if (mode == MODE_CBC)
897 BlockCopy(cipher->iv32,IV);
898
899 return inputLen;
900 }
901
902 /*
903 +*****************************************************************************
904 *
905 * Function Name: blockDecrypt
906 *
907 * Function: Decrypt block(s) of data using Twofish
908 *
909 * Arguments: cipher = ptr to already initialized cipherInstance
910 * key = ptr to already initialized keyInstance
911 * input = ptr to data blocks to be decrypted
912 * inputLen = # bits to encrypt (multiple of blockSize)
913 * outBuffer = ptr to where to put decrypted blocks
914 *
915 * Return: # bits ciphered (>= 0)
916 * else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL)
917 *
918 * Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits.
919 * If inputLen is not a multiple of BLOCK_SIZE bits in those modes,
920 * an error BAD_INPUT_LEN is returned. In CFB1 mode, all block
921 * sizes can be supported.
922 *
923 -****************************************************************************/
924 static int blockDecrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input,
925 int inputLen, BYTE *outBuffer)
926 {
927 int i,n; /* loop counters */
928 DWORD x[BLOCK_SIZE/32]; /* block being encrypted */
929 DWORD t0,t1; /* temp variables */
930 int rounds=key->numRounds; /* number of rounds */
931 BYTE bit,bit0,ctBit,carry; /* temps for CFB */
932
933 /* make local copies of things for faster access */
934 int mode = cipher->mode;
935 DWORD sk[TOTAL_SUBKEYS];
936 DWORD IV[BLOCK_SIZE/32];
937
938 GetSboxKey;
939
940 #if VALIDATE_PARMS
941 if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG))
942 return BAD_CIPHER_STATE;
943 if ((key == NULL) || (key->keySig != VALID_SIG))
944 return BAD_KEY_INSTANCE;
945 if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1))
946 return BAD_KEY_INSTANCE;
947 if ((cipher->mode != MODE_CFB1) && (inputLen % BLOCK_SIZE))
948 return BAD_INPUT_LEN;
949 #if ALIGN32
950 if ( (((int)cipher) & 3) || (((int)key ) & 3) ||
951 (((int)input) & 3) || (((int)outBuffer) & 3))
952 return BAD_ALIGN32;
953 #endif
954 #endif
955
956 if (cipher->mode == MODE_CFB1)
957 { /* use blockEncrypt here to handle CFB, one block at a time */
958 cipher->mode = MODE_ECB; /* do encryption in ECB */
959 for (n=0;n<inputLen;n++)
960 {
961 blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x);
962 bit0 = 0x80 >> (n & 7);
963 ctBit = input[n/8] & bit0;
964 outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) |
965 (ctBit ^ ((((BYTE *) x)[0] & 0x80) >> (n&7)));
966 carry = ctBit >> (7 - (n&7));
967 for (i=BLOCK_SIZE/8-1;i>=0;i--)
968 {
969 bit = cipher->IV[i] >> 7; /* save next "carry" from shift */
970 cipher->IV[i] = (cipher->IV[i] << 1) ^ carry;
971 carry = bit;
972 }
973 }
974 cipher->mode = MODE_CFB1; /* restore mode for next time */
975 return inputLen;
976 }
977
978 /* here for ECB, CBC modes */
979 if (key->direction != DIR_DECRYPT)
980 ReverseRoundSubkeys(key,DIR_DECRYPT); /* reverse the round subkey order */
981 #ifdef USE_ASM
982 if ((useAsm & 2) && (inputLen))
983 #ifdef COMPILE_KEY
984 if (key->keySig == VALID_SIG)
985 return ((CipherProc *)(key->decryptFuncPtr))(cipher,key,input,inputLen,outBuffer);
986 #else
987 return (*blockDecrypt_86)(cipher,key,input,inputLen,outBuffer);
988 #endif
989 #endif
990 /* make local copy of subkeys for speed */
991 memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds));
992 if (mode == MODE_CBC)
993 BlockCopy(IV,cipher->iv32)
994 else
995 IV[0]=IV[1]=IV[2]=IV[3]=0;
996
997 for (n=0;n<inputLen;n+=BLOCK_SIZE,input+=BLOCK_SIZE/8,outBuffer+=BLOCK_SIZE/8)
998 {
999 #define LoadBlockD(N) x[N^2]=Bswap(((DWORD *)input)[N]) ^ sk[OUTPUT_WHITEN+N]
1000 LoadBlockD(0); LoadBlockD(1); LoadBlockD(2); LoadBlockD(3);
1001
1002 #define DecryptRound(K,R,id) \
1003 t0 = Fe32##id(x[K ],0); \
1004 t1 = Fe32##id(x[K^1],3); \
1005 x[K^2] = ROL (x[K^2],1); \
1006 x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \
1007 x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \
1008 x[K^3] = ROR (x[K^3],1);
1009
1010 #define Decrypt2(R,id) { DecryptRound(2,R+1,id); DecryptRound(0,R,id); }
1011
1012 #if defined(ZERO_KEY)
1013 switch (key->keyLen)
1014 {
1015 case 128:
1016 for (i=rounds-2;i>=0;i-=2)
1017 Decrypt2(i,_128);
1018 break;
1019 case 192:
1020 for (i=rounds-2;i>=0;i-=2)
1021 Decrypt2(i,_192);
1022 break;
1023 case 256:
1024 for (i=rounds-2;i>=0;i-=2)
1025 Decrypt2(i,_256);
1026 break;
1027 }
1028 #else
1029 {
1030 Decrypt2(14,_);
1031 Decrypt2(12,_);
1032 Decrypt2(10,_);
1033 Decrypt2( 8,_);
1034 Decrypt2( 6,_);
1035 Decrypt2( 4,_);
1036 Decrypt2( 2,_);
1037 Decrypt2( 0,_);
1038 }
1039 #endif
1040 if (cipher->mode == MODE_ECB)
1041 {
1042 #if LittleEndian
1043 #define StoreBlockD(N) ((DWORD *)outBuffer)[N] = x[N] ^ sk[INPUT_WHITEN+N]
1044 #else
1045 #define StoreBlockD(N) { t0=x[N]^sk[INPUT_WHITEN+N]; ((DWORD *)outBuffer)[N] = Bswap(t0); }
1046 #endif
1047 StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3);
1048 #undef StoreBlockD
1049 continue;
1050 }
1051 else
1052 {
1053 #define StoreBlockD(N) x[N] ^= sk[INPUT_WHITEN+N] ^ IV[N]; \
1054 IV[N] = Bswap(((DWORD *)input)[N]); \
1055 ((DWORD *)outBuffer)[N] = Bswap(x[N]);
1056 StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3);
1057 #undef StoreBlockD
1058 }
1059 }
1060 if (mode == MODE_CBC) /* restore iv32 to cipher */
1061 BlockCopy(cipher->iv32,IV)
1062
1063 return inputLen;
1064 }
1065
1066 #ifdef GetCodeSize
1067 static DWORD TwofishCodeSize(void)
1068 {
1069 DWORD x= Here(0);
1070 #ifdef USE_ASM
1071 if (useAsm & 3)
1072 return TwofishAsmCodeSize();
1073 #endif
1074 return x - TwofishCodeStart();
1075 };
1076 #endif