1 |
/*************************************************************************** |
2 |
TWOFISH2.C -- Optimized C API calls for TWOFISH AES submission |
3 |
|
4 |
Submitters: |
5 |
Bruce Schneier, Counterpane Systems |
6 |
Doug Whiting, Hi/fn |
7 |
John Kelsey, Counterpane Systems |
8 |
Chris Hall, Counterpane Systems |
9 |
David Wagner, UC Berkeley |
10 |
|
11 |
Code Author: Doug Whiting, Hi/fn |
12 |
|
13 |
Version 1.00 April 1998 |
14 |
|
15 |
Copyright 1998, Hi/fn and Counterpane Systems. All rights reserved. |
16 |
|
17 |
Notes: |
18 |
* Optimized version |
19 |
* Tab size is set to 4 characters in this file |
20 |
|
21 |
***************************************************************************/ |
22 |
#include "aes.h" |
23 |
#include "table.h" |
24 |
|
25 |
#include <memory.h> |
26 |
/*#include <assert.h>*/ |
27 |
|
28 |
#if defined(min_key) && !defined(MIN_KEY) |
29 |
#define MIN_KEY 1 /* toupper() */ |
30 |
#elif defined(part_key) && !defined(PART_KEY) |
31 |
#define PART_KEY 1 |
32 |
#elif defined(zero_key) && !defined(ZERO_KEY) |
33 |
#define ZERO_KEY 1 |
34 |
#endif |
35 |
|
36 |
|
37 |
#ifdef USE_ASM |
38 |
extern int useAsm; /* ok to use ASM code? */ |
39 |
|
40 |
typedef int cdecl CipherProc |
41 |
(cipherInstance *cipher, keyInstance *key,BYTE *input,int inputLen,BYTE *outBuffer); |
42 |
typedef int cdecl KeySetupProc(keyInstance *key); |
43 |
|
44 |
extern CipherProc *blockEncrypt_86; /* ptr to ASM functions */ |
45 |
extern CipherProc *blockDecrypt_86; |
46 |
extern KeySetupProc *reKey_86; |
47 |
extern DWORD cdecl TwofishAsmCodeSize(void); |
48 |
#endif |
49 |
|
50 |
/* |
51 |
+***************************************************************************** |
52 |
* Constants/Macros/Tables |
53 |
-****************************************************************************/ |
54 |
|
55 |
#define CONST /* help syntax from C++, NOP here */ |
56 |
|
57 |
static CONST fullSbox MDStab; /* not actually const. Initialized ONE time */ |
58 |
static int needToBuildMDS=1; /* is MDStab initialized yet? */ |
59 |
|
60 |
#define BIG_TAB 0 |
61 |
|
62 |
#if BIG_TAB |
63 |
static BYTE bigTab[4][256][256]; /* pre-computed S-box */ |
64 |
#endif |
65 |
|
66 |
/* number of rounds for various key sizes: 128, 192, 256 */ |
67 |
/* (ignored for now in optimized code!) */ |
68 |
static CONST int numRounds[4]= {0,ROUNDS_128,ROUNDS_192,ROUNDS_256}; |
69 |
|
70 |
#if REENTRANT |
71 |
#define _sBox_ key->sBox8x32 |
72 |
#else |
73 |
static fullSbox _sBox_; /* permuted MDStab based on keys */ |
74 |
#endif |
75 |
#define _sBox8_(N) (((BYTE *) _sBox_) + (N)*256) |
76 |
|
77 |
/*------- see what level of S-box precomputation we need to do -----*/ |
78 |
#if defined(ZERO_KEY) |
79 |
#define MOD_STRING "(Zero S-box keying)" |
80 |
#define Fe32_128(x,R) \ |
81 |
( MDStab[0][p8(01)[p8(02)[_b(x,R )]^b0(SKEY[1])]^b0(SKEY[0])] ^ \ |
82 |
MDStab[1][p8(11)[p8(12)[_b(x,R+1)]^b1(SKEY[1])]^b1(SKEY[0])] ^ \ |
83 |
MDStab[2][p8(21)[p8(22)[_b(x,R+2)]^b2(SKEY[1])]^b2(SKEY[0])] ^ \ |
84 |
MDStab[3][p8(31)[p8(32)[_b(x,R+3)]^b3(SKEY[1])]^b3(SKEY[0])] ) |
85 |
#define Fe32_192(x,R) \ |
86 |
( MDStab[0][p8(01)[p8(02)[p8(03)[_b(x,R )]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \ |
87 |
MDStab[1][p8(11)[p8(12)[p8(13)[_b(x,R+1)]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \ |
88 |
MDStab[2][p8(21)[p8(22)[p8(23)[_b(x,R+2)]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \ |
89 |
MDStab[3][p8(31)[p8(32)[p8(33)[_b(x,R+3)]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] ) |
90 |
#define Fe32_256(x,R) \ |
91 |
( MDStab[0][p8(01)[p8(02)[p8(03)[p8(04)[_b(x,R )]^b0(SKEY[3])]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \ |
92 |
MDStab[1][p8(11)[p8(12)[p8(13)[p8(14)[_b(x,R+1)]^b1(SKEY[3])]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \ |
93 |
MDStab[2][p8(21)[p8(22)[p8(23)[p8(24)[_b(x,R+2)]^b2(SKEY[3])]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \ |
94 |
MDStab[3][p8(31)[p8(32)[p8(33)[p8(34)[_b(x,R+3)]^b3(SKEY[3])]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] ) |
95 |
|
96 |
#define GetSboxKey DWORD SKEY[4]; /* local copy */ \ |
97 |
memcpy(SKEY,key->sboxKeys,sizeof(SKEY)); |
98 |
/*----------------------------------------------------------------*/ |
99 |
#elif defined(MIN_KEY) |
100 |
#define MOD_STRING "(Minimal keying)" |
101 |
#define Fe32_(x,R)(MDStab[0][p8(01)[_sBox8_(0)[_b(x,R )]] ^ b0(SKEY0)] ^ \ |
102 |
MDStab[1][p8(11)[_sBox8_(1)[_b(x,R+1)]] ^ b1(SKEY0)] ^ \ |
103 |
MDStab[2][p8(21)[_sBox8_(2)[_b(x,R+2)]] ^ b2(SKEY0)] ^ \ |
104 |
MDStab[3][p8(31)[_sBox8_(3)[_b(x,R+3)]] ^ b3(SKEY0)]) |
105 |
#define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; } |
106 |
#define GetSboxKey DWORD SKEY0 = key->sboxKeys[0] /* local copy */ |
107 |
/*----------------------------------------------------------------*/ |
108 |
#elif defined(PART_KEY) |
109 |
#define MOD_STRING "(Partial keying)" |
110 |
#define Fe32_(x,R)(MDStab[0][_sBox8_(0)[_b(x,R )]] ^ \ |
111 |
MDStab[1][_sBox8_(1)[_b(x,R+1)]] ^ \ |
112 |
MDStab[2][_sBox8_(2)[_b(x,R+2)]] ^ \ |
113 |
MDStab[3][_sBox8_(3)[_b(x,R+3)]]) |
114 |
#define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; } |
115 |
#define GetSboxKey |
116 |
/*----------------------------------------------------------------*/ |
117 |
#else /* default is FULL_KEY */ |
118 |
#ifndef FULL_KEY |
119 |
#define FULL_KEY 1 |
120 |
#endif |
121 |
#if BIG_TAB |
122 |
#define TAB_STR " (Big table)" |
123 |
#else |
124 |
#define TAB_STR |
125 |
#endif |
126 |
#ifdef COMPILE_KEY |
127 |
#define MOD_STRING "(Compiled subkeys)" TAB_STR |
128 |
#else |
129 |
#define MOD_STRING "(Full keying)" TAB_STR |
130 |
#endif |
131 |
/* Fe32_ does a full S-box + MDS lookup. Need to #define _sBox_ before use. |
132 |
Note that we "interleave" 0,1, and 2,3 to avoid cache bank collisions |
133 |
in optimized assembly language. |
134 |
*/ |
135 |
#define Fe32_(x,R) (_sBox_[0][2*_b(x,R )] ^ _sBox_[0][2*_b(x,R+1)+1] ^ \ |
136 |
_sBox_[2][2*_b(x,R+2)] ^ _sBox_[2][2*_b(x,R+3)+1]) |
137 |
/* set a single S-box value, given the input byte */ |
138 |
#define sbSet(N,i,J,v) { _sBox_[N&2][2*i+(N&1)+2*J]=MDStab[N][v]; } |
139 |
#define GetSboxKey |
140 |
#endif |
141 |
|
142 |
/* macro(s) for debugging help */ |
143 |
#define CHECK_TABLE 0 /* nonzero --> compare against "slow" table */ |
144 |
#define VALIDATE_PARMS 0 /* disable for full speed */ |
145 |
|
146 |
/* end of debug macros */ |
147 |
|
148 |
#ifdef GetCodeSize |
149 |
static extern DWORD Here(DWORD x); /* return caller's address! */ |
150 |
static DWORD TwofishCodeStart(void) { return Here(0); } |
151 |
#endif |
152 |
|
153 |
/* |
154 |
+***************************************************************************** |
155 |
* |
156 |
* Function Name: TableOp |
157 |
* |
158 |
* Function: Handle table use checking |
159 |
* |
160 |
* Arguments: op = what to do (see TAB_* defns in AES.H) |
161 |
* |
162 |
* Return: TRUE --> done (for TAB_QUERY) |
163 |
* |
164 |
* Notes: This routine is for use in generating the tables KAT file. |
165 |
* For this optimized version, we don't actually track table usage, |
166 |
* since it would make the macros incredibly ugly. Instead we just |
167 |
* run for a fixed number of queries and then say we're done. |
168 |
* |
169 |
-****************************************************************************/ |
170 |
static int TableOp(int op) |
171 |
{ |
172 |
static int queryCnt=0; |
173 |
|
174 |
switch (op) |
175 |
{ |
176 |
case TAB_DISABLE: |
177 |
break; |
178 |
case TAB_ENABLE: |
179 |
break; |
180 |
case TAB_RESET: |
181 |
queryCnt=0; |
182 |
break; |
183 |
case TAB_QUERY: |
184 |
queryCnt++; |
185 |
if (queryCnt < TAB_MIN_QUERY) |
186 |
return FALSE; |
187 |
} |
188 |
return TRUE; |
189 |
} |
190 |
|
191 |
|
192 |
#if CHECK_TABLE |
193 |
/* |
194 |
+***************************************************************************** |
195 |
* |
196 |
* Function Name: f32 |
197 |
* |
198 |
* Function: Run four bytes through keyed S-boxes and apply MDS matrix |
199 |
* |
200 |
* Arguments: x = input to f function |
201 |
* k32 = pointer to key dwords |
202 |
* keyLen = total key length (k32 --> keyLey/2 bits) |
203 |
* |
204 |
* Return: The output of the keyed permutation applied to x. |
205 |
* |
206 |
* Notes: |
207 |
* This function is a keyed 32-bit permutation. It is the major building |
208 |
* block for the Twofish round function, including the four keyed 8x8 |
209 |
* permutations and the 4x4 MDS matrix multiply. This function is used |
210 |
* both for generating round subkeys and within the round function on the |
211 |
* block being encrypted. |
212 |
* |
213 |
* This version is fairly slow and pedagogical, although a smartcard would |
214 |
* probably perform the operation exactly this way in firmware. For |
215 |
* ultimate performance, the entire operation can be completed with four |
216 |
* lookups into four 256x32-bit tables, with three dword xors. |
217 |
* |
218 |
* The MDS matrix is defined in TABLE.H. To multiply by Mij, just use the |
219 |
* macro Mij(x). |
220 |
* |
221 |
-****************************************************************************/ |
222 |
static DWORD f32(DWORD x,CONST DWORD *k32,int keyLen) |
223 |
{ |
224 |
BYTE b[4]; |
225 |
|
226 |
/* Run each byte thru 8x8 S-boxes, xoring with key byte at each stage. */ |
227 |
/* Note that each byte goes through a different combination of S-boxes.*/ |
228 |
|
229 |
*((DWORD *)b) = Bswap(x); /* make b[0] = LSB, b[3] = MSB */ |
230 |
switch (((keyLen + 63)/64) & 3) |
231 |
{ |
232 |
case 0: /* 256 bits of key */ |
233 |
b[0] = p8(04)[b[0]] ^ b0(k32[3]); |
234 |
b[1] = p8(14)[b[1]] ^ b1(k32[3]); |
235 |
b[2] = p8(24)[b[2]] ^ b2(k32[3]); |
236 |
b[3] = p8(34)[b[3]] ^ b3(k32[3]); |
237 |
/* fall thru, having pre-processed b[0]..b[3] with k32[3] */ |
238 |
case 3: /* 192 bits of key */ |
239 |
b[0] = p8(03)[b[0]] ^ b0(k32[2]); |
240 |
b[1] = p8(13)[b[1]] ^ b1(k32[2]); |
241 |
b[2] = p8(23)[b[2]] ^ b2(k32[2]); |
242 |
b[3] = p8(33)[b[3]] ^ b3(k32[2]); |
243 |
/* fall thru, having pre-processed b[0]..b[3] with k32[2] */ |
244 |
case 2: /* 128 bits of key */ |
245 |
b[0] = p8(00)[p8(01)[p8(02)[b[0]] ^ b0(k32[1])] ^ b0(k32[0])]; |
246 |
b[1] = p8(10)[p8(11)[p8(12)[b[1]] ^ b1(k32[1])] ^ b1(k32[0])]; |
247 |
b[2] = p8(20)[p8(21)[p8(22)[b[2]] ^ b2(k32[1])] ^ b2(k32[0])]; |
248 |
b[3] = p8(30)[p8(31)[p8(32)[b[3]] ^ b3(k32[1])] ^ b3(k32[0])]; |
249 |
} |
250 |
|
251 |
/* Now perform the MDS matrix multiply inline. */ |
252 |
return ((M00(b[0]) ^ M01(b[1]) ^ M02(b[2]) ^ M03(b[3])) ) ^ |
253 |
((M10(b[0]) ^ M11(b[1]) ^ M12(b[2]) ^ M13(b[3])) << 8) ^ |
254 |
((M20(b[0]) ^ M21(b[1]) ^ M22(b[2]) ^ M23(b[3])) << 16) ^ |
255 |
((M30(b[0]) ^ M31(b[1]) ^ M32(b[2]) ^ M33(b[3])) << 24) ; |
256 |
} |
257 |
#endif /* CHECK_TABLE */ |
258 |
|
259 |
|
260 |
/* |
261 |
+***************************************************************************** |
262 |
* |
263 |
* Function Name: RS_MDS_encode |
264 |
* |
265 |
* Function: Use (12,8) Reed-Solomon code over GF(256) to produce |
266 |
* a key S-box dword from two key material dwords. |
267 |
* |
268 |
* Arguments: k0 = 1st dword |
269 |
* k1 = 2nd dword |
270 |
* |
271 |
* Return: Remainder polynomial generated using RS code |
272 |
* |
273 |
* Notes: |
274 |
* Since this computation is done only once per reKey per 64 bits of key, |
275 |
* the performance impact of this routine is imperceptible. The RS code |
276 |
* chosen has "simple" coefficients to allow smartcard/hardware implementation |
277 |
* without lookup tables. |
278 |
* |
279 |
-****************************************************************************/ |
280 |
static DWORD RS_MDS_Encode(DWORD k0,DWORD k1) |
281 |
{ |
282 |
int i,j; |
283 |
DWORD r; |
284 |
|
285 |
for (i=r=0;i<2;i++) |
286 |
{ |
287 |
r ^= (i) ? k0 : k1; /* merge in 32 more key bits */ |
288 |
for (j=0;j<4;j++) /* shift one byte at a time */ |
289 |
RS_rem(r); |
290 |
} |
291 |
return r; |
292 |
} |
293 |
|
294 |
|
295 |
/* |
296 |
+***************************************************************************** |
297 |
* |
298 |
* Function Name: BuildMDS |
299 |
* |
300 |
* Function: Initialize the MDStab array |
301 |
* |
302 |
* Arguments: None. |
303 |
* |
304 |
* Return: None. |
305 |
* |
306 |
* Notes: |
307 |
* Here we precompute all the fixed MDS table. This only needs to be done |
308 |
* one time at initialization, after which the table is "CONST". |
309 |
* |
310 |
-****************************************************************************/ |
311 |
static void BuildMDS(void) |
312 |
{ |
313 |
int i; |
314 |
DWORD d; |
315 |
BYTE m1[2],mX[2],mY[4]; |
316 |
|
317 |
for (i=0;i<256;i++) |
318 |
{ |
319 |
m1[0]=P8x8[0][i]; /* compute all the matrix elements */ |
320 |
mX[0]=(BYTE) Mul_X(m1[0]); |
321 |
mY[0]=(BYTE) Mul_Y(m1[0]); |
322 |
|
323 |
m1[1]=P8x8[1][i]; |
324 |
mX[1]=(BYTE) Mul_X(m1[1]); |
325 |
mY[1]=(BYTE) Mul_Y(m1[1]); |
326 |
|
327 |
#undef Mul_1 /* change what the pre-processor does with Mij */ |
328 |
#undef Mul_X |
329 |
#undef Mul_Y |
330 |
#define Mul_1 m1 /* It will now access m01[], m5B[], and mEF[] */ |
331 |
#define Mul_X mX |
332 |
#define Mul_Y mY |
333 |
|
334 |
#define SetMDS(N) \ |
335 |
b0(d) = M0##N[P_##N##0]; \ |
336 |
b1(d) = M1##N[P_##N##0]; \ |
337 |
b2(d) = M2##N[P_##N##0]; \ |
338 |
b3(d) = M3##N[P_##N##0]; \ |
339 |
MDStab[N][i] = d; |
340 |
|
341 |
SetMDS(0); /* fill in the matrix with elements computed above */ |
342 |
SetMDS(1); |
343 |
SetMDS(2); |
344 |
SetMDS(3); |
345 |
} |
346 |
#undef Mul_1 |
347 |
#undef Mul_X |
348 |
#undef Mul_Y |
349 |
#define Mul_1 Mx_1 /* re-enable true multiply */ |
350 |
#define Mul_X Mx_X |
351 |
#define Mul_Y Mx_Y |
352 |
|
353 |
#if BIG_TAB |
354 |
{ |
355 |
int j,k; |
356 |
BYTE *q0,*q1; |
357 |
|
358 |
for (i=0;i<4;i++) |
359 |
{ |
360 |
switch (i) |
361 |
{ |
362 |
case 0: q0=p8(01); q1=p8(02); break; |
363 |
case 1: q0=p8(11); q1=p8(12); break; |
364 |
case 2: q0=p8(21); q1=p8(22); break; |
365 |
case 3: q0=p8(31); q1=p8(32); break; |
366 |
} |
367 |
for (j=0;j<256;j++) |
368 |
for (k=0;k<256;k++) |
369 |
bigTab[i][j][k]=q0[q1[k]^j]; |
370 |
} |
371 |
} |
372 |
#endif |
373 |
|
374 |
needToBuildMDS=0; /* NEVER modify the table again! */ |
375 |
} |
376 |
|
377 |
/* |
378 |
+***************************************************************************** |
379 |
* |
380 |
* Function Name: ReverseRoundSubkeys |
381 |
* |
382 |
* Function: Reverse order of round subkeys to switch between encrypt/decrypt |
383 |
* |
384 |
* Arguments: key = ptr to keyInstance to be reversed |
385 |
* newDir = new direction value |
386 |
* |
387 |
* Return: None. |
388 |
* |
389 |
* Notes: |
390 |
* This optimization allows both blockEncrypt and blockDecrypt to use the same |
391 |
* "fallthru" switch statement based on the number of rounds. |
392 |
* Note that key->numRounds must be even and >= 2 here. |
393 |
* |
394 |
-****************************************************************************/ |
395 |
static void ReverseRoundSubkeys(keyInstance *key,BYTE newDir) |
396 |
{ |
397 |
DWORD t0,t1; |
398 |
register DWORD *r0=key->subKeys+ROUND_SUBKEYS; |
399 |
register DWORD *r1=r0 + 2*key->numRounds - 2; |
400 |
|
401 |
for (;r0 < r1;r0+=2,r1-=2) |
402 |
{ |
403 |
t0=r0[0]; /* swap the order */ |
404 |
t1=r0[1]; |
405 |
r0[0]=r1[0]; /* but keep relative order within pairs */ |
406 |
r0[1]=r1[1]; |
407 |
r1[0]=t0; |
408 |
r1[1]=t1; |
409 |
} |
410 |
|
411 |
key->direction=newDir; |
412 |
} |
413 |
|
414 |
/* |
415 |
+***************************************************************************** |
416 |
* |
417 |
* Function Name: Xor256 |
418 |
* |
419 |
* Function: Copy an 8-bit permutation (256 bytes), xoring with a byte |
420 |
* |
421 |
* Arguments: dst = where to put result |
422 |
* src = where to get data (can be same asa dst) |
423 |
* b = byte to xor |
424 |
* |
425 |
* Return: None |
426 |
* |
427 |
* Notes: |
428 |
* BorlandC's optimization is terrible! When we put the code inline, |
429 |
* it generates fairly good code in the *following* segment (not in the Xor256 |
430 |
* code itself). If the call is made, the code following the call is awful! |
431 |
* The penalty is nearly 50%! So we take the code size hit for inlining for |
432 |
* Borland, while Microsoft happily works with a call. |
433 |
* |
434 |
-****************************************************************************/ |
435 |
#if defined(__BORLANDC__) /* do it inline */ |
436 |
#define Xor32(dst,src,i) { ((DWORD *)dst)[i] = ((DWORD *)src)[i] ^ tmpX; } |
437 |
#define Xor256(dst,src,b) \ |
438 |
{ \ |
439 |
register DWORD tmpX=0x01010101u * b;\ |
440 |
for (i=0;i<64;i+=4) \ |
441 |
{ Xor32(dst,src,i ); Xor32(dst,src,i+1); Xor32(dst,src,i+2); Xor32(dst,src,i+3); } \ |
442 |
} |
443 |
#else /* do it as a function call */ |
444 |
static void Xor256(void *dst,void *src,BYTE b) |
445 |
{ |
446 |
register DWORD x=b*0x01010101u; /* replicate byte to all four bytes */ |
447 |
register DWORD *d=(DWORD *)dst; |
448 |
register DWORD *s=(DWORD *)src; |
449 |
#define X_8(N) { d[N]=s[N] ^ x; d[N+1]=s[N+1] ^ x; } |
450 |
#define X_32(N) { X_8(N); X_8(N+2); X_8(N+4); X_8(N+6); } |
451 |
X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */ |
452 |
d+=32; /* keep offsets small! */ |
453 |
s+=32; |
454 |
X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */ |
455 |
} |
456 |
#endif |
457 |
|
458 |
/* |
459 |
+***************************************************************************** |
460 |
* |
461 |
* Function Name: reKey |
462 |
* |
463 |
* Function: Initialize the Twofish key schedule from key32 |
464 |
* |
465 |
* Arguments: key = ptr to keyInstance to be initialized |
466 |
* |
467 |
* Return: TRUE on success |
468 |
* |
469 |
* Notes: |
470 |
* Here we precompute all the round subkeys, although that is not actually |
471 |
* required. For example, on a smartcard, the round subkeys can |
472 |
* be generated on-the-fly using f32() |
473 |
* |
474 |
-****************************************************************************/ |
475 |
static int reKey(keyInstance *key) |
476 |
{ |
477 |
int i,j,k64Cnt,keyLen; |
478 |
int subkeyCnt; |
479 |
DWORD A=0,B=0,q; |
480 |
DWORD sKey[MAX_KEY_BITS/64],k32e[MAX_KEY_BITS/64],k32o[MAX_KEY_BITS/64]; |
481 |
BYTE L0[256],L1[256]; /* small local 8-bit permutations */ |
482 |
|
483 |
#if VALIDATE_PARMS |
484 |
#if ALIGN32 |
485 |
if (((int)key) & 3) |
486 |
return BAD_ALIGN32; |
487 |
if ((key->keyLen % 64) || (key->keyLen < MIN_KEY_BITS)) |
488 |
return BAD_KEY_INSTANCE; |
489 |
#endif |
490 |
#endif |
491 |
|
492 |
if (needToBuildMDS) /* do this one time only */ |
493 |
BuildMDS(); |
494 |
|
495 |
#define F32(res,x,k32) \ |
496 |
{ \ |
497 |
DWORD t=x; \ |
498 |
switch (k64Cnt & 3) \ |
499 |
{ \ |
500 |
case 0: /* same as 4 */ \ |
501 |
b0(t) = p8(04)[b0(t)] ^ b0(k32[3]); \ |
502 |
b1(t) = p8(14)[b1(t)] ^ b1(k32[3]); \ |
503 |
b2(t) = p8(24)[b2(t)] ^ b2(k32[3]); \ |
504 |
b3(t) = p8(34)[b3(t)] ^ b3(k32[3]); \ |
505 |
/* fall thru, having pre-processed t */ \ |
506 |
case 3: b0(t) = p8(03)[b0(t)] ^ b0(k32[2]); \ |
507 |
b1(t) = p8(13)[b1(t)] ^ b1(k32[2]); \ |
508 |
b2(t) = p8(23)[b2(t)] ^ b2(k32[2]); \ |
509 |
b3(t) = p8(33)[b3(t)] ^ b3(k32[2]); \ |
510 |
/* fall thru, having pre-processed t */ \ |
511 |
case 2: /* 128-bit keys (optimize for this case) */ \ |
512 |
res= MDStab[0][p8(01)[p8(02)[b0(t)] ^ b0(k32[1])] ^ b0(k32[0])] ^ \ |
513 |
MDStab[1][p8(11)[p8(12)[b1(t)] ^ b1(k32[1])] ^ b1(k32[0])] ^ \ |
514 |
MDStab[2][p8(21)[p8(22)[b2(t)] ^ b2(k32[1])] ^ b2(k32[0])] ^ \ |
515 |
MDStab[3][p8(31)[p8(32)[b3(t)] ^ b3(k32[1])] ^ b3(k32[0])] ; \ |
516 |
} \ |
517 |
} |
518 |
|
519 |
|
520 |
#if !CHECK_TABLE |
521 |
#if defined(USE_ASM) /* only do this if not using assember */ |
522 |
if (!(useAsm & 4)) |
523 |
#endif |
524 |
#endif |
525 |
{ |
526 |
subkeyCnt = ROUND_SUBKEYS + 2*key->numRounds; |
527 |
keyLen=key->keyLen; |
528 |
k64Cnt=(keyLen+63)/64; /* number of 64-bit key words */ |
529 |
for (i=0,j=k64Cnt-1;i<k64Cnt;i++,j--) |
530 |
{ /* split into even/odd key dwords */ |
531 |
k32e[i]=key->key32[2*i ]; |
532 |
k32o[i]=key->key32[2*i+1]; |
533 |
/* compute S-box keys using (12,8) Reed-Solomon code over GF(256) */ |
534 |
sKey[j]=key->sboxKeys[j]=RS_MDS_Encode(k32e[i],k32o[i]); /* reverse order */ |
535 |
} |
536 |
} |
537 |
|
538 |
#ifdef USE_ASM |
539 |
if (useAsm & 4) |
540 |
{ |
541 |
#if defined(COMPILE_KEY) && defined(USE_ASM) |
542 |
key->keySig = VALID_SIG; /* show that we are initialized */ |
543 |
key->codeSize = sizeof(key->compiledCode); /* set size */ |
544 |
#endif |
545 |
reKey_86(key); |
546 |
} |
547 |
else |
548 |
#endif |
549 |
{ |
550 |
for (i=q=0;i<subkeyCnt/2;i++,q+=SK_STEP) |
551 |
{ /* compute round subkeys for PHT */ |
552 |
F32(A,q ,k32e); /* A uses even key dwords */ |
553 |
F32(B,q+SK_BUMP,k32o); /* B uses odd key dwords */ |
554 |
B = ROL(B,8); |
555 |
key->subKeys[2*i ] = A+B; /* combine with a PHT */ |
556 |
B = A + 2*B; |
557 |
key->subKeys[2*i+1] = ROL(B,SK_ROTL); |
558 |
} |
559 |
#if !defined(ZERO_KEY) |
560 |
switch (keyLen) /* case out key length for speed in generating S-boxes */ |
561 |
{ |
562 |
case 128: |
563 |
#if defined(FULL_KEY) || defined(PART_KEY) |
564 |
#if BIG_TAB |
565 |
#define one128(N,J) sbSet(N,i,J,L0[i+J]) |
566 |
#define sb128(N) { \ |
567 |
BYTE *qq=bigTab[N][b##N(sKey[1])]; \ |
568 |
Xor256(L0,qq,b##N(sKey[0])); \ |
569 |
for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } } |
570 |
#else |
571 |
#define one128(N,J) sbSet(N,i,J,p8(N##1)[L0[i+J]]^k0) |
572 |
#define sb128(N) { \ |
573 |
Xor256(L0,p8(N##2),b##N(sKey[1])); \ |
574 |
{ register DWORD k0=b##N(sKey[0]); \ |
575 |
for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } } } |
576 |
#endif |
577 |
#elif defined(MIN_KEY) |
578 |
#define sb128(N) Xor256(_sBox8_(N),p8(N##2),b##N(sKey[1])) |
579 |
#endif |
580 |
sb128(0); sb128(1); sb128(2); sb128(3); |
581 |
break; |
582 |
case 192: |
583 |
#if defined(FULL_KEY) || defined(PART_KEY) |
584 |
#define one192(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0) |
585 |
#define sb192(N) { \ |
586 |
Xor256(L0,p8(N##3),b##N(sKey[2])); \ |
587 |
{ register DWORD k0=b##N(sKey[0]); \ |
588 |
register DWORD k1=b##N(sKey[1]); \ |
589 |
for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } } |
590 |
#elif defined(MIN_KEY) |
591 |
#define one192(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1) |
592 |
#define sb192(N) { \ |
593 |
Xor256(L0,p8(N##3),b##N(sKey[2])); \ |
594 |
{ register DWORD k1=b##N(sKey[1]); \ |
595 |
for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } } |
596 |
#endif |
597 |
sb192(0); sb192(1); sb192(2); sb192(3); |
598 |
break; |
599 |
case 256: |
600 |
#if defined(FULL_KEY) || defined(PART_KEY) |
601 |
#define one256(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0) |
602 |
#define sb256(N) { \ |
603 |
Xor256(L1,p8(N##4),b##N(sKey[3])); \ |
604 |
for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \ |
605 |
L0[i+1]=p8(N##3)[L1[i+1]]; } \ |
606 |
Xor256(L0,L0,b##N(sKey[2])); \ |
607 |
{ register DWORD k0=b##N(sKey[0]); \ |
608 |
register DWORD k1=b##N(sKey[1]); \ |
609 |
for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } } |
610 |
#elif defined(MIN_KEY) |
611 |
#define one256(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1) |
612 |
#define sb256(N) { \ |
613 |
Xor256(L1,p8(N##4),b##N(sKey[3])); \ |
614 |
for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \ |
615 |
L0[i+1]=p8(N##3)[L1[i+1]]; } \ |
616 |
Xor256(L0,L0,b##N(sKey[2])); \ |
617 |
{ register DWORD k1=b##N(sKey[1]); \ |
618 |
for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } } |
619 |
#endif |
620 |
sb256(0); sb256(1); sb256(2); sb256(3); |
621 |
break; |
622 |
} |
623 |
#endif |
624 |
} |
625 |
|
626 |
#if CHECK_TABLE /* sanity check vs. pedagogical code*/ |
627 |
{ |
628 |
GetSboxKey; |
629 |
for (i=0;i<subkeyCnt/2;i++) |
630 |
{ |
631 |
A = f32(i*SK_STEP ,k32e,keyLen); /* A uses even key dwords */ |
632 |
B = f32(i*SK_STEP+SK_BUMP,k32o,keyLen); /* B uses odd key dwords */ |
633 |
B = ROL(B,8); |
634 |
assert(key->subKeys[2*i ] == A+ B); |
635 |
assert(key->subKeys[2*i+1] == ROL(A+2*B,SK_ROTL)); |
636 |
} |
637 |
#if !defined(ZERO_KEY) /* any S-boxes to check? */ |
638 |
for (i=q=0;i<256;i++,q+=0x01010101) |
639 |
assert(f32(q,key->sboxKeys,keyLen) == Fe32_(q,0)); |
640 |
#endif |
641 |
} |
642 |
#endif /* CHECK_TABLE */ |
643 |
|
644 |
if (key->direction == DIR_ENCRYPT) |
645 |
ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */ |
646 |
|
647 |
return TRUE; |
648 |
} |
649 |
/* |
650 |
+***************************************************************************** |
651 |
* |
652 |
* Function Name: makeKey |
653 |
* |
654 |
* Function: Initialize the Twofish key schedule |
655 |
* |
656 |
* Arguments: key = ptr to keyInstance to be initialized |
657 |
* direction = DIR_ENCRYPT or DIR_DECRYPT |
658 |
* keyLen = # bits of key text at *keyMaterial |
659 |
* keyMaterial = ptr to hex ASCII chars representing key bits |
660 |
* |
661 |
* Return: TRUE on success |
662 |
* else error code (e.g., BAD_KEY_DIR) |
663 |
* |
664 |
* Notes: This parses the key bits from keyMaterial. Zeroes out unused key bits |
665 |
* |
666 |
-****************************************************************************/ |
667 |
static int makeKey(keyInstance *key, BYTE direction, int keyLen,CONST char *keyMaterial) |
668 |
{ |
669 |
int i; |
670 |
|
671 |
#if VALIDATE_PARMS /* first, sanity check on parameters */ |
672 |
if (key == NULL) |
673 |
return BAD_KEY_INSTANCE;/* must have a keyInstance to initialize */ |
674 |
if ((direction != DIR_ENCRYPT) && (direction != DIR_DECRYPT)) |
675 |
return BAD_KEY_DIR; /* must have valid direction */ |
676 |
if ((keyLen > MAX_KEY_BITS) || (keyLen < 8) || (keyLen & 0x3F)) |
677 |
return BAD_KEY_MAT; /* length must be valid */ |
678 |
key->keySig = VALID_SIG; /* show that we are initialized */ |
679 |
#if ALIGN32 |
680 |
if ((((int)key) & 3) || (((int)key->key32) & 3)) |
681 |
return BAD_ALIGN32; |
682 |
#endif |
683 |
#endif |
684 |
|
685 |
key->direction = direction;/* set our cipher direction */ |
686 |
key->keyLen = (keyLen+63) & ~63; /* round up to multiple of 64 */ |
687 |
key->numRounds = numRounds[(keyLen-1)/64]; |
688 |
memset(key->key32,0,sizeof(key->key32)); /* zero unused bits */ |
689 |
|
690 |
if (keyMaterial == NULL) |
691 |
return TRUE; /* allow a "dummy" call */ |
692 |
|
693 |
for (i=0;i<keyLen/32;i++) /* make byte-oriented copy for CFB1 */ |
694 |
key->key32[i] = (((unsigned char *)keyMaterial)[i*4+0] << 0) |
695 |
| (((unsigned char *)keyMaterial)[i*4+1] << 8) |
696 |
| (((unsigned char *)keyMaterial)[i*4+2] << 16) |
697 |
| (((unsigned char *)keyMaterial)[i*4+3] << 24); |
698 |
|
699 |
return reKey(key); /* generate round subkeys */ |
700 |
} |
701 |
|
702 |
|
703 |
/* |
704 |
+***************************************************************************** |
705 |
* |
706 |
* Function Name: cipherInit |
707 |
* |
708 |
* Function: Initialize the Twofish cipher in a given mode |
709 |
* |
710 |
* Arguments: cipher = ptr to cipherInstance to be initialized |
711 |
* mode = MODE_ECB, MODE_CBC, or MODE_CFB1 |
712 |
* IV = ptr to hex ASCII test representing IV bytes |
713 |
* |
714 |
* Return: TRUE on success |
715 |
* else error code (e.g., BAD_CIPHER_MODE) |
716 |
* |
717 |
-****************************************************************************/ |
718 |
static int cipherInit(cipherInstance *cipher, BYTE mode,CONST char *IV) |
719 |
{ |
720 |
int i; |
721 |
#if VALIDATE_PARMS /* first, sanity check on parameters */ |
722 |
if (cipher == NULL) |
723 |
return BAD_PARAMS; /* must have a cipherInstance to initialize */ |
724 |
if ((mode != MODE_ECB) && (mode != MODE_CBC) && (mode != MODE_CFB1)) |
725 |
return BAD_CIPHER_MODE; /* must have valid cipher mode */ |
726 |
cipher->cipherSig = VALID_SIG; |
727 |
#if ALIGN32 |
728 |
if ((((int)cipher) & 3) || (((int)cipher->IV) & 3) || (((int)cipher->iv32) & 3)) |
729 |
return BAD_ALIGN32; |
730 |
#endif |
731 |
#endif |
732 |
|
733 |
if ((mode != MODE_ECB) && (IV)) /* parse the IV */ |
734 |
{ |
735 |
memcpy (cipher->iv32, IV, BLOCK_SIZE/32); |
736 |
for (i=0;i<BLOCK_SIZE/32;i++) /* make byte-oriented copy for CFB1 */ |
737 |
((DWORD *)cipher->IV)[i] = Bswap(cipher->iv32[i]); |
738 |
} |
739 |
|
740 |
cipher->mode = mode; |
741 |
|
742 |
return TRUE; |
743 |
} |
744 |
|
745 |
/* |
746 |
+***************************************************************************** |
747 |
* |
748 |
* Function Name: blockEncrypt |
749 |
* |
750 |
* Function: Encrypt block(s) of data using Twofish |
751 |
* |
752 |
* Arguments: cipher = ptr to already initialized cipherInstance |
753 |
* key = ptr to already initialized keyInstance |
754 |
* input = ptr to data blocks to be encrypted |
755 |
* inputLen = # bits to encrypt (multiple of blockSize) |
756 |
* outBuffer = ptr to where to put encrypted blocks |
757 |
* |
758 |
* Return: # bits ciphered (>= 0) |
759 |
* else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL) |
760 |
* |
761 |
* Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits. |
762 |
* If inputLen is not a multiple of BLOCK_SIZE bits in those modes, |
763 |
* an error BAD_INPUT_LEN is returned. In CFB1 mode, all block |
764 |
* sizes can be supported. |
765 |
* |
766 |
-****************************************************************************/ |
767 |
static int blockEncrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input, |
768 |
int inputLen, BYTE *outBuffer) |
769 |
{ |
770 |
int i,n; /* loop counters */ |
771 |
DWORD x[BLOCK_SIZE/32]; /* block being encrypted */ |
772 |
DWORD t0,t1; /* temp variables */ |
773 |
int rounds=key->numRounds; /* number of rounds */ |
774 |
BYTE bit,bit0,ctBit,carry; /* temps for CFB */ |
775 |
|
776 |
/* make local copies of things for faster access */ |
777 |
int mode = cipher->mode; |
778 |
DWORD sk[TOTAL_SUBKEYS]; |
779 |
DWORD IV[BLOCK_SIZE/32]; |
780 |
|
781 |
GetSboxKey; |
782 |
|
783 |
#if VALIDATE_PARMS |
784 |
if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG)) |
785 |
return BAD_CIPHER_STATE; |
786 |
if ((key == NULL) || (key->keySig != VALID_SIG)) |
787 |
return BAD_KEY_INSTANCE; |
788 |
if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1)) |
789 |
return BAD_KEY_INSTANCE; |
790 |
if ((mode != MODE_CFB1) && (inputLen % BLOCK_SIZE)) |
791 |
return BAD_INPUT_LEN; |
792 |
#if ALIGN32 |
793 |
if ( (((int)cipher) & 3) || (((int)key ) & 3) || |
794 |
(((int)input ) & 3) || (((int)outBuffer) & 3)) |
795 |
return BAD_ALIGN32; |
796 |
#endif |
797 |
#endif |
798 |
|
799 |
if (mode == MODE_CFB1) |
800 |
{ /* use recursion here to handle CFB, one block at a time */ |
801 |
cipher->mode = MODE_ECB; /* do encryption in ECB */ |
802 |
for (n=0;n<inputLen;n++) |
803 |
{ |
804 |
blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x); |
805 |
bit0 = 0x80 >> (n & 7);/* which bit position in byte */ |
806 |
ctBit = (input[n/8] & bit0) ^ ((((BYTE *) x)[0] & 0x80) >> (n&7)); |
807 |
outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) | ctBit; |
808 |
carry = ctBit >> (7 - (n&7)); |
809 |
for (i=BLOCK_SIZE/8-1;i>=0;i--) |
810 |
{ |
811 |
bit = cipher->IV[i] >> 7; /* save next "carry" from shift */ |
812 |
cipher->IV[i] = (cipher->IV[i] << 1) ^ carry; |
813 |
carry = bit; |
814 |
} |
815 |
} |
816 |
cipher->mode = MODE_CFB1; /* restore mode for next time */ |
817 |
return inputLen; |
818 |
} |
819 |
|
820 |
/* here for ECB, CBC modes */ |
821 |
if (key->direction != DIR_ENCRYPT) |
822 |
ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */ |
823 |
|
824 |
#ifdef USE_ASM |
825 |
if ((useAsm & 1) && (inputLen)) |
826 |
#ifdef COMPILE_KEY |
827 |
if (key->keySig == VALID_SIG) |
828 |
return ((CipherProc *)(key->encryptFuncPtr))(cipher,key,input,inputLen,outBuffer); |
829 |
#else |
830 |
return (*blockEncrypt_86)(cipher,key,input,inputLen,outBuffer); |
831 |
#endif |
832 |
#endif |
833 |
/* make local copy of subkeys for speed */ |
834 |
memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds)); |
835 |
if (mode == MODE_CBC) |
836 |
BlockCopy(IV,cipher->iv32) |
837 |
else |
838 |
IV[0]=IV[1]=IV[2]=IV[3]=0; |
839 |
|
840 |
for (n=0;n<inputLen;n+=BLOCK_SIZE,input+=BLOCK_SIZE/8,outBuffer+=BLOCK_SIZE/8) |
841 |
{ |
842 |
#define LoadBlockE(N) x[N]=Bswap(((DWORD *)input)[N]) ^ sk[INPUT_WHITEN+N] ^ IV[N] |
843 |
LoadBlockE(0); LoadBlockE(1); LoadBlockE(2); LoadBlockE(3); |
844 |
#define EncryptRound(K,R,id) \ |
845 |
t0 = Fe32##id(x[K ],0); \ |
846 |
t1 = Fe32##id(x[K^1],3); \ |
847 |
x[K^3] = ROL(x[K^3],1); \ |
848 |
x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \ |
849 |
x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \ |
850 |
x[K^2] = ROR(x[K^2],1); |
851 |
#define Encrypt2(R,id) { EncryptRound(0,R+1,id); EncryptRound(2,R,id); } |
852 |
|
853 |
#if defined(ZERO_KEY) |
854 |
switch (key->keyLen) |
855 |
{ |
856 |
case 128: |
857 |
for (i=rounds-2;i>=0;i-=2) |
858 |
Encrypt2(i,_128); |
859 |
break; |
860 |
case 192: |
861 |
for (i=rounds-2;i>=0;i-=2) |
862 |
Encrypt2(i,_192); |
863 |
break; |
864 |
case 256: |
865 |
for (i=rounds-2;i>=0;i-=2) |
866 |
Encrypt2(i,_256); |
867 |
break; |
868 |
} |
869 |
#else |
870 |
Encrypt2(14,_); |
871 |
Encrypt2(12,_); |
872 |
Encrypt2(10,_); |
873 |
Encrypt2( 8,_); |
874 |
Encrypt2( 6,_); |
875 |
Encrypt2( 4,_); |
876 |
Encrypt2( 2,_); |
877 |
Encrypt2( 0,_); |
878 |
#endif |
879 |
|
880 |
/* need to do (or undo, depending on your point of view) final swap */ |
881 |
#if LittleEndian |
882 |
#define StoreBlockE(N) ((DWORD *)outBuffer)[N]=x[N^2] ^ sk[OUTPUT_WHITEN+N] |
883 |
#else |
884 |
#define StoreBlockE(N) { t0=x[N^2] ^ sk[OUTPUT_WHITEN+N]; ((DWORD *)outBuffer)[N]=Bswap(t0); } |
885 |
#endif |
886 |
StoreBlockE(0); StoreBlockE(1); StoreBlockE(2); StoreBlockE(3); |
887 |
if (mode == MODE_CBC) |
888 |
{ |
889 |
IV[0]=Bswap(((DWORD *)outBuffer)[0]); |
890 |
IV[1]=Bswap(((DWORD *)outBuffer)[1]); |
891 |
IV[2]=Bswap(((DWORD *)outBuffer)[2]); |
892 |
IV[3]=Bswap(((DWORD *)outBuffer)[3]); |
893 |
} |
894 |
} |
895 |
|
896 |
if (mode == MODE_CBC) |
897 |
BlockCopy(cipher->iv32,IV); |
898 |
|
899 |
return inputLen; |
900 |
} |
901 |
|
902 |
/* |
903 |
+***************************************************************************** |
904 |
* |
905 |
* Function Name: blockDecrypt |
906 |
* |
907 |
* Function: Decrypt block(s) of data using Twofish |
908 |
* |
909 |
* Arguments: cipher = ptr to already initialized cipherInstance |
910 |
* key = ptr to already initialized keyInstance |
911 |
* input = ptr to data blocks to be decrypted |
912 |
* inputLen = # bits to encrypt (multiple of blockSize) |
913 |
* outBuffer = ptr to where to put decrypted blocks |
914 |
* |
915 |
* Return: # bits ciphered (>= 0) |
916 |
* else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL) |
917 |
* |
918 |
* Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits. |
919 |
* If inputLen is not a multiple of BLOCK_SIZE bits in those modes, |
920 |
* an error BAD_INPUT_LEN is returned. In CFB1 mode, all block |
921 |
* sizes can be supported. |
922 |
* |
923 |
-****************************************************************************/ |
924 |
static int blockDecrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input, |
925 |
int inputLen, BYTE *outBuffer) |
926 |
{ |
927 |
int i,n; /* loop counters */ |
928 |
DWORD x[BLOCK_SIZE/32]; /* block being encrypted */ |
929 |
DWORD t0,t1; /* temp variables */ |
930 |
int rounds=key->numRounds; /* number of rounds */ |
931 |
BYTE bit,bit0,ctBit,carry; /* temps for CFB */ |
932 |
|
933 |
/* make local copies of things for faster access */ |
934 |
int mode = cipher->mode; |
935 |
DWORD sk[TOTAL_SUBKEYS]; |
936 |
DWORD IV[BLOCK_SIZE/32]; |
937 |
|
938 |
GetSboxKey; |
939 |
|
940 |
#if VALIDATE_PARMS |
941 |
if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG)) |
942 |
return BAD_CIPHER_STATE; |
943 |
if ((key == NULL) || (key->keySig != VALID_SIG)) |
944 |
return BAD_KEY_INSTANCE; |
945 |
if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1)) |
946 |
return BAD_KEY_INSTANCE; |
947 |
if ((cipher->mode != MODE_CFB1) && (inputLen % BLOCK_SIZE)) |
948 |
return BAD_INPUT_LEN; |
949 |
#if ALIGN32 |
950 |
if ( (((int)cipher) & 3) || (((int)key ) & 3) || |
951 |
(((int)input) & 3) || (((int)outBuffer) & 3)) |
952 |
return BAD_ALIGN32; |
953 |
#endif |
954 |
#endif |
955 |
|
956 |
if (cipher->mode == MODE_CFB1) |
957 |
{ /* use blockEncrypt here to handle CFB, one block at a time */ |
958 |
cipher->mode = MODE_ECB; /* do encryption in ECB */ |
959 |
for (n=0;n<inputLen;n++) |
960 |
{ |
961 |
blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x); |
962 |
bit0 = 0x80 >> (n & 7); |
963 |
ctBit = input[n/8] & bit0; |
964 |
outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) | |
965 |
(ctBit ^ ((((BYTE *) x)[0] & 0x80) >> (n&7))); |
966 |
carry = ctBit >> (7 - (n&7)); |
967 |
for (i=BLOCK_SIZE/8-1;i>=0;i--) |
968 |
{ |
969 |
bit = cipher->IV[i] >> 7; /* save next "carry" from shift */ |
970 |
cipher->IV[i] = (cipher->IV[i] << 1) ^ carry; |
971 |
carry = bit; |
972 |
} |
973 |
} |
974 |
cipher->mode = MODE_CFB1; /* restore mode for next time */ |
975 |
return inputLen; |
976 |
} |
977 |
|
978 |
/* here for ECB, CBC modes */ |
979 |
if (key->direction != DIR_DECRYPT) |
980 |
ReverseRoundSubkeys(key,DIR_DECRYPT); /* reverse the round subkey order */ |
981 |
#ifdef USE_ASM |
982 |
if ((useAsm & 2) && (inputLen)) |
983 |
#ifdef COMPILE_KEY |
984 |
if (key->keySig == VALID_SIG) |
985 |
return ((CipherProc *)(key->decryptFuncPtr))(cipher,key,input,inputLen,outBuffer); |
986 |
#else |
987 |
return (*blockDecrypt_86)(cipher,key,input,inputLen,outBuffer); |
988 |
#endif |
989 |
#endif |
990 |
/* make local copy of subkeys for speed */ |
991 |
memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds)); |
992 |
if (mode == MODE_CBC) |
993 |
BlockCopy(IV,cipher->iv32) |
994 |
else |
995 |
IV[0]=IV[1]=IV[2]=IV[3]=0; |
996 |
|
997 |
for (n=0;n<inputLen;n+=BLOCK_SIZE,input+=BLOCK_SIZE/8,outBuffer+=BLOCK_SIZE/8) |
998 |
{ |
999 |
#define LoadBlockD(N) x[N^2]=Bswap(((DWORD *)input)[N]) ^ sk[OUTPUT_WHITEN+N] |
1000 |
LoadBlockD(0); LoadBlockD(1); LoadBlockD(2); LoadBlockD(3); |
1001 |
|
1002 |
#define DecryptRound(K,R,id) \ |
1003 |
t0 = Fe32##id(x[K ],0); \ |
1004 |
t1 = Fe32##id(x[K^1],3); \ |
1005 |
x[K^2] = ROL (x[K^2],1); \ |
1006 |
x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \ |
1007 |
x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \ |
1008 |
x[K^3] = ROR (x[K^3],1); |
1009 |
|
1010 |
#define Decrypt2(R,id) { DecryptRound(2,R+1,id); DecryptRound(0,R,id); } |
1011 |
|
1012 |
#if defined(ZERO_KEY) |
1013 |
switch (key->keyLen) |
1014 |
{ |
1015 |
case 128: |
1016 |
for (i=rounds-2;i>=0;i-=2) |
1017 |
Decrypt2(i,_128); |
1018 |
break; |
1019 |
case 192: |
1020 |
for (i=rounds-2;i>=0;i-=2) |
1021 |
Decrypt2(i,_192); |
1022 |
break; |
1023 |
case 256: |
1024 |
for (i=rounds-2;i>=0;i-=2) |
1025 |
Decrypt2(i,_256); |
1026 |
break; |
1027 |
} |
1028 |
#else |
1029 |
{ |
1030 |
Decrypt2(14,_); |
1031 |
Decrypt2(12,_); |
1032 |
Decrypt2(10,_); |
1033 |
Decrypt2( 8,_); |
1034 |
Decrypt2( 6,_); |
1035 |
Decrypt2( 4,_); |
1036 |
Decrypt2( 2,_); |
1037 |
Decrypt2( 0,_); |
1038 |
} |
1039 |
#endif |
1040 |
if (cipher->mode == MODE_ECB) |
1041 |
{ |
1042 |
#if LittleEndian |
1043 |
#define StoreBlockD(N) ((DWORD *)outBuffer)[N] = x[N] ^ sk[INPUT_WHITEN+N] |
1044 |
#else |
1045 |
#define StoreBlockD(N) { t0=x[N]^sk[INPUT_WHITEN+N]; ((DWORD *)outBuffer)[N] = Bswap(t0); } |
1046 |
#endif |
1047 |
StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3); |
1048 |
#undef StoreBlockD |
1049 |
continue; |
1050 |
} |
1051 |
else |
1052 |
{ |
1053 |
#define StoreBlockD(N) x[N] ^= sk[INPUT_WHITEN+N] ^ IV[N]; \ |
1054 |
IV[N] = Bswap(((DWORD *)input)[N]); \ |
1055 |
((DWORD *)outBuffer)[N] = Bswap(x[N]); |
1056 |
StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3); |
1057 |
#undef StoreBlockD |
1058 |
} |
1059 |
} |
1060 |
if (mode == MODE_CBC) /* restore iv32 to cipher */ |
1061 |
BlockCopy(cipher->iv32,IV) |
1062 |
|
1063 |
return inputLen; |
1064 |
} |
1065 |
|
1066 |
#ifdef GetCodeSize |
1067 |
static DWORD TwofishCodeSize(void) |
1068 |
{ |
1069 |
DWORD x= Here(0); |
1070 |
#ifdef USE_ASM |
1071 |
if (useAsm & 3) |
1072 |
return TwofishAsmCodeSize(); |
1073 |
#endif |
1074 |
return x - TwofishCodeStart(); |
1075 |
}; |
1076 |
#endif |