1 |
/*************************************************************************** |
2 |
TWOFISH2.C -- Optimized C API calls for TWOFISH AES submission |
3 |
|
4 |
Submitters: |
5 |
Bruce Schneier, Counterpane Systems |
6 |
Doug Whiting, Hi/fn |
7 |
John Kelsey, Counterpane Systems |
8 |
Chris Hall, Counterpane Systems |
9 |
David Wagner, UC Berkeley |
10 |
|
11 |
Code Author: Doug Whiting, Hi/fn |
12 |
|
13 |
Version 1.00 April 1998 |
14 |
|
15 |
Copyright 1998, Hi/fn and Counterpane Systems. All rights reserved. |
16 |
|
17 |
Notes: |
18 |
* Optimized version |
19 |
* Tab size is set to 4 characters in this file |
20 |
|
21 |
***************************************************************************/ |
22 |
#include "aes.h" |
23 |
#include "table.h" |
24 |
|
25 |
#include <memory.h> |
26 |
/*#include <assert.h>*/ |
27 |
|
28 |
#if defined(min_key) && !defined(MIN_KEY) |
29 |
#define MIN_KEY 1 /* toupper() */ |
30 |
#elif defined(part_key) && !defined(PART_KEY) |
31 |
#define PART_KEY 1 |
32 |
#elif defined(zero_key) && !defined(ZERO_KEY) |
33 |
#define ZERO_KEY 1 |
34 |
#endif |
35 |
|
36 |
|
37 |
#ifdef USE_ASM |
38 |
extern int useAsm; /* ok to use ASM code? */ |
39 |
|
40 |
typedef int cdecl CipherProc |
41 |
(cipherInstance *cipher, keyInstance *key,BYTE *input,int inputLen,BYTE *outBuffer); |
42 |
typedef int cdecl KeySetupProc(keyInstance *key); |
43 |
|
44 |
extern CipherProc *blockEncrypt_86; /* ptr to ASM functions */ |
45 |
extern CipherProc *blockDecrypt_86; |
46 |
extern KeySetupProc *reKey_86; |
47 |
extern DWORD cdecl TwofishAsmCodeSize(void); |
48 |
#endif |
49 |
|
50 |
/* |
51 |
+***************************************************************************** |
52 |
* Constants/Macros/Tables |
53 |
-****************************************************************************/ |
54 |
|
55 |
#define CONST /* help syntax from C++, NOP here */ |
56 |
|
57 |
static CONST fullSbox MDStab; /* not actually const. Initialized ONE time */ |
58 |
static int needToBuildMDS=1; /* is MDStab initialized yet? */ |
59 |
|
60 |
#define BIG_TAB 0 |
61 |
|
62 |
#if BIG_TAB |
63 |
static BYTE bigTab[4][256][256]; /* pre-computed S-box */ |
64 |
#endif |
65 |
|
66 |
/* number of rounds for various key sizes: 128, 192, 256 */ |
67 |
/* (ignored for now in optimized code!) */ |
68 |
static CONST int numRounds[4]= {0,ROUNDS_128,ROUNDS_192,ROUNDS_256}; |
69 |
|
70 |
#if REENTRANT |
71 |
#define _sBox_ key->sBox8x32 |
72 |
#else |
73 |
static fullSbox _sBox_; /* permuted MDStab based on keys */ |
74 |
#endif |
75 |
#define _sBox8_(N) (((BYTE *) _sBox_) + (N)*256) |
76 |
|
77 |
/*------- see what level of S-box precomputation we need to do -----*/ |
78 |
#if defined(ZERO_KEY) |
79 |
#define MOD_STRING "(Zero S-box keying)" |
80 |
#define Fe32_128(x,R) \ |
81 |
( MDStab[0][p8(01)[p8(02)[_b(x,R )]^b0(SKEY[1])]^b0(SKEY[0])] ^ \ |
82 |
MDStab[1][p8(11)[p8(12)[_b(x,R+1)]^b1(SKEY[1])]^b1(SKEY[0])] ^ \ |
83 |
MDStab[2][p8(21)[p8(22)[_b(x,R+2)]^b2(SKEY[1])]^b2(SKEY[0])] ^ \ |
84 |
MDStab[3][p8(31)[p8(32)[_b(x,R+3)]^b3(SKEY[1])]^b3(SKEY[0])] ) |
85 |
#define Fe32_192(x,R) \ |
86 |
( MDStab[0][p8(01)[p8(02)[p8(03)[_b(x,R )]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \ |
87 |
MDStab[1][p8(11)[p8(12)[p8(13)[_b(x,R+1)]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \ |
88 |
MDStab[2][p8(21)[p8(22)[p8(23)[_b(x,R+2)]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \ |
89 |
MDStab[3][p8(31)[p8(32)[p8(33)[_b(x,R+3)]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] ) |
90 |
#define Fe32_256(x,R) \ |
91 |
( MDStab[0][p8(01)[p8(02)[p8(03)[p8(04)[_b(x,R )]^b0(SKEY[3])]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \ |
92 |
MDStab[1][p8(11)[p8(12)[p8(13)[p8(14)[_b(x,R+1)]^b1(SKEY[3])]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \ |
93 |
MDStab[2][p8(21)[p8(22)[p8(23)[p8(24)[_b(x,R+2)]^b2(SKEY[3])]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \ |
94 |
MDStab[3][p8(31)[p8(32)[p8(33)[p8(34)[_b(x,R+3)]^b3(SKEY[3])]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] ) |
95 |
|
96 |
#define GetSboxKey DWORD SKEY[4]; /* local copy */ \ |
97 |
memcpy(SKEY,key->sboxKeys,sizeof(SKEY)); |
98 |
/*----------------------------------------------------------------*/ |
99 |
#elif defined(MIN_KEY) |
100 |
#define MOD_STRING "(Minimal keying)" |
101 |
#define Fe32_(x,R)(MDStab[0][p8(01)[_sBox8_(0)[_b(x,R )]] ^ b0(SKEY0)] ^ \ |
102 |
MDStab[1][p8(11)[_sBox8_(1)[_b(x,R+1)]] ^ b1(SKEY0)] ^ \ |
103 |
MDStab[2][p8(21)[_sBox8_(2)[_b(x,R+2)]] ^ b2(SKEY0)] ^ \ |
104 |
MDStab[3][p8(31)[_sBox8_(3)[_b(x,R+3)]] ^ b3(SKEY0)]) |
105 |
#define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; } |
106 |
#define GetSboxKey DWORD SKEY0 = key->sboxKeys[0] /* local copy */ |
107 |
/*----------------------------------------------------------------*/ |
108 |
#elif defined(PART_KEY) |
109 |
#define MOD_STRING "(Partial keying)" |
110 |
#define Fe32_(x,R)(MDStab[0][_sBox8_(0)[_b(x,R )]] ^ \ |
111 |
MDStab[1][_sBox8_(1)[_b(x,R+1)]] ^ \ |
112 |
MDStab[2][_sBox8_(2)[_b(x,R+2)]] ^ \ |
113 |
MDStab[3][_sBox8_(3)[_b(x,R+3)]]) |
114 |
#define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; } |
115 |
#define GetSboxKey |
116 |
/*----------------------------------------------------------------*/ |
117 |
#else /* default is FULL_KEY */ |
118 |
#ifndef FULL_KEY |
119 |
#define FULL_KEY 1 |
120 |
#endif |
121 |
#if BIG_TAB |
122 |
#define TAB_STR " (Big table)" |
123 |
#else |
124 |
#define TAB_STR |
125 |
#endif |
126 |
#ifdef COMPILE_KEY |
127 |
#define MOD_STRING "(Compiled subkeys)" TAB_STR |
128 |
#else |
129 |
#define MOD_STRING "(Full keying)" TAB_STR |
130 |
#endif |
131 |
/* Fe32_ does a full S-box + MDS lookup. Need to #define _sBox_ before use. |
132 |
Note that we "interleave" 0,1, and 2,3 to avoid cache bank collisions |
133 |
in optimized assembly language. |
134 |
*/ |
135 |
#define Fe32_(x,R) (_sBox_[0][2*_b(x,R )] ^ _sBox_[0][2*_b(x,R+1)+1] ^ \ |
136 |
_sBox_[2][2*_b(x,R+2)] ^ _sBox_[2][2*_b(x,R+3)+1]) |
137 |
/* set a single S-box value, given the input byte */ |
138 |
//#define sbSet(N,i,J,v) { _sBox_[N&2][2*i+(N&1)+2*J]=MDStab[N][v]; } |
139 |
#define sbSet(N,i,J,v) { *((DWORD *)_sBox_ + (N&2)*256 + 2*i + (N&1) + 2*J) = MDStab[N][v]; } |
140 |
#define GetSboxKey |
141 |
#endif |
142 |
|
143 |
/* macro(s) for debugging help */ |
144 |
#define CHECK_TABLE 0 /* nonzero --> compare against "slow" table */ |
145 |
#define VALIDATE_PARMS 0 /* disable for full speed */ |
146 |
|
147 |
/* end of debug macros */ |
148 |
|
149 |
#ifdef GetCodeSize |
150 |
static extern DWORD Here(DWORD x); /* return caller's address! */ |
151 |
static DWORD TwofishCodeStart(void) { return Here(0); } |
152 |
#endif |
153 |
|
154 |
/* |
155 |
+***************************************************************************** |
156 |
* |
157 |
* Function Name: TableOp |
158 |
* |
159 |
* Function: Handle table use checking |
160 |
* |
161 |
* Arguments: op = what to do (see TAB_* defns in AES.H) |
162 |
* |
163 |
* Return: TRUE --> done (for TAB_QUERY) |
164 |
* |
165 |
* Notes: This routine is for use in generating the tables KAT file. |
166 |
* For this optimized version, we don't actually track table usage, |
167 |
* since it would make the macros incredibly ugly. Instead we just |
168 |
* run for a fixed number of queries and then say we're done. |
169 |
* |
170 |
-****************************************************************************/ |
171 |
static int TableOp(int op) |
172 |
{ |
173 |
static int queryCnt=0; |
174 |
|
175 |
switch (op) |
176 |
{ |
177 |
case TAB_DISABLE: |
178 |
break; |
179 |
case TAB_ENABLE: |
180 |
break; |
181 |
case TAB_RESET: |
182 |
queryCnt=0; |
183 |
break; |
184 |
case TAB_QUERY: |
185 |
queryCnt++; |
186 |
if (queryCnt < TAB_MIN_QUERY) |
187 |
return FALSE; |
188 |
} |
189 |
return TRUE; |
190 |
} |
191 |
|
192 |
|
193 |
#if CHECK_TABLE |
194 |
/* |
195 |
+***************************************************************************** |
196 |
* |
197 |
* Function Name: f32 |
198 |
* |
199 |
* Function: Run four bytes through keyed S-boxes and apply MDS matrix |
200 |
* |
201 |
* Arguments: x = input to f function |
202 |
* k32 = pointer to key dwords |
203 |
* keyLen = total key length (k32 --> keyLey/2 bits) |
204 |
* |
205 |
* Return: The output of the keyed permutation applied to x. |
206 |
* |
207 |
* Notes: |
208 |
* This function is a keyed 32-bit permutation. It is the major building |
209 |
* block for the Twofish round function, including the four keyed 8x8 |
210 |
* permutations and the 4x4 MDS matrix multiply. This function is used |
211 |
* both for generating round subkeys and within the round function on the |
212 |
* block being encrypted. |
213 |
* |
214 |
* This version is fairly slow and pedagogical, although a smartcard would |
215 |
* probably perform the operation exactly this way in firmware. For |
216 |
* ultimate performance, the entire operation can be completed with four |
217 |
* lookups into four 256x32-bit tables, with three dword xors. |
218 |
* |
219 |
* The MDS matrix is defined in TABLE.H. To multiply by Mij, just use the |
220 |
* macro Mij(x). |
221 |
* |
222 |
-****************************************************************************/ |
223 |
static DWORD f32(DWORD x,CONST DWORD *k32,int keyLen) |
224 |
{ |
225 |
BYTE b[4]; |
226 |
|
227 |
/* Run each byte thru 8x8 S-boxes, xoring with key byte at each stage. */ |
228 |
/* Note that each byte goes through a different combination of S-boxes.*/ |
229 |
|
230 |
*((DWORD *)b) = Bswap(x); /* make b[0] = LSB, b[3] = MSB */ |
231 |
switch (((keyLen + 63)/64) & 3) |
232 |
{ |
233 |
case 0: /* 256 bits of key */ |
234 |
b[0] = p8(04)[b[0]] ^ b0(k32[3]); |
235 |
b[1] = p8(14)[b[1]] ^ b1(k32[3]); |
236 |
b[2] = p8(24)[b[2]] ^ b2(k32[3]); |
237 |
b[3] = p8(34)[b[3]] ^ b3(k32[3]); |
238 |
/* fall thru, having pre-processed b[0]..b[3] with k32[3] */ |
239 |
case 3: /* 192 bits of key */ |
240 |
b[0] = p8(03)[b[0]] ^ b0(k32[2]); |
241 |
b[1] = p8(13)[b[1]] ^ b1(k32[2]); |
242 |
b[2] = p8(23)[b[2]] ^ b2(k32[2]); |
243 |
b[3] = p8(33)[b[3]] ^ b3(k32[2]); |
244 |
/* fall thru, having pre-processed b[0]..b[3] with k32[2] */ |
245 |
case 2: /* 128 bits of key */ |
246 |
b[0] = p8(00)[p8(01)[p8(02)[b[0]] ^ b0(k32[1])] ^ b0(k32[0])]; |
247 |
b[1] = p8(10)[p8(11)[p8(12)[b[1]] ^ b1(k32[1])] ^ b1(k32[0])]; |
248 |
b[2] = p8(20)[p8(21)[p8(22)[b[2]] ^ b2(k32[1])] ^ b2(k32[0])]; |
249 |
b[3] = p8(30)[p8(31)[p8(32)[b[3]] ^ b3(k32[1])] ^ b3(k32[0])]; |
250 |
} |
251 |
|
252 |
/* Now perform the MDS matrix multiply inline. */ |
253 |
return ((M00(b[0]) ^ M01(b[1]) ^ M02(b[2]) ^ M03(b[3])) ) ^ |
254 |
((M10(b[0]) ^ M11(b[1]) ^ M12(b[2]) ^ M13(b[3])) << 8) ^ |
255 |
((M20(b[0]) ^ M21(b[1]) ^ M22(b[2]) ^ M23(b[3])) << 16) ^ |
256 |
((M30(b[0]) ^ M31(b[1]) ^ M32(b[2]) ^ M33(b[3])) << 24) ; |
257 |
} |
258 |
#endif /* CHECK_TABLE */ |
259 |
|
260 |
|
261 |
/* |
262 |
+***************************************************************************** |
263 |
* |
264 |
* Function Name: RS_MDS_encode |
265 |
* |
266 |
* Function: Use (12,8) Reed-Solomon code over GF(256) to produce |
267 |
* a key S-box dword from two key material dwords. |
268 |
* |
269 |
* Arguments: k0 = 1st dword |
270 |
* k1 = 2nd dword |
271 |
* |
272 |
* Return: Remainder polynomial generated using RS code |
273 |
* |
274 |
* Notes: |
275 |
* Since this computation is done only once per reKey per 64 bits of key, |
276 |
* the performance impact of this routine is imperceptible. The RS code |
277 |
* chosen has "simple" coefficients to allow smartcard/hardware implementation |
278 |
* without lookup tables. |
279 |
* |
280 |
-****************************************************************************/ |
281 |
static DWORD RS_MDS_Encode(DWORD k0,DWORD k1) |
282 |
{ |
283 |
int i,j; |
284 |
DWORD r; |
285 |
|
286 |
for (i=r=0;i<2;i++) |
287 |
{ |
288 |
r ^= (i) ? k0 : k1; /* merge in 32 more key bits */ |
289 |
for (j=0;j<4;j++) /* shift one byte at a time */ |
290 |
RS_rem(r); |
291 |
} |
292 |
return r; |
293 |
} |
294 |
|
295 |
|
296 |
/* |
297 |
+***************************************************************************** |
298 |
* |
299 |
* Function Name: BuildMDS |
300 |
* |
301 |
* Function: Initialize the MDStab array |
302 |
* |
303 |
* Arguments: None. |
304 |
* |
305 |
* Return: None. |
306 |
* |
307 |
* Notes: |
308 |
* Here we precompute all the fixed MDS table. This only needs to be done |
309 |
* one time at initialization, after which the table is "CONST". |
310 |
* |
311 |
-****************************************************************************/ |
312 |
static void BuildMDS(void) |
313 |
{ |
314 |
int i; |
315 |
DWORD d; |
316 |
BYTE m1[2],mX[2],mY[4]; |
317 |
|
318 |
for (i=0;i<256;i++) |
319 |
{ |
320 |
m1[0]=P8x8[0][i]; /* compute all the matrix elements */ |
321 |
mX[0]=(BYTE) Mul_X(m1[0]); |
322 |
mY[0]=(BYTE) Mul_Y(m1[0]); |
323 |
|
324 |
m1[1]=P8x8[1][i]; |
325 |
mX[1]=(BYTE) Mul_X(m1[1]); |
326 |
mY[1]=(BYTE) Mul_Y(m1[1]); |
327 |
|
328 |
#undef Mul_1 /* change what the pre-processor does with Mij */ |
329 |
#undef Mul_X |
330 |
#undef Mul_Y |
331 |
#define Mul_1 m1 /* It will now access m01[], m5B[], and mEF[] */ |
332 |
#define Mul_X mX |
333 |
#define Mul_Y mY |
334 |
|
335 |
#define SetMDS(N) \ |
336 |
b0(d) = M0##N[P_##N##0]; \ |
337 |
b1(d) = M1##N[P_##N##0]; \ |
338 |
b2(d) = M2##N[P_##N##0]; \ |
339 |
b3(d) = M3##N[P_##N##0]; \ |
340 |
MDStab[N][i] = d; |
341 |
|
342 |
SetMDS(0); /* fill in the matrix with elements computed above */ |
343 |
SetMDS(1); |
344 |
SetMDS(2); |
345 |
SetMDS(3); |
346 |
} |
347 |
#undef Mul_1 |
348 |
#undef Mul_X |
349 |
#undef Mul_Y |
350 |
#define Mul_1 Mx_1 /* re-enable true multiply */ |
351 |
#define Mul_X Mx_X |
352 |
#define Mul_Y Mx_Y |
353 |
|
354 |
#if BIG_TAB |
355 |
{ |
356 |
int j,k; |
357 |
BYTE *q0,*q1; |
358 |
|
359 |
for (i=0;i<4;i++) |
360 |
{ |
361 |
switch (i) |
362 |
{ |
363 |
case 0: q0=p8(01); q1=p8(02); break; |
364 |
case 1: q0=p8(11); q1=p8(12); break; |
365 |
case 2: q0=p8(21); q1=p8(22); break; |
366 |
case 3: q0=p8(31); q1=p8(32); break; |
367 |
} |
368 |
for (j=0;j<256;j++) |
369 |
for (k=0;k<256;k++) |
370 |
bigTab[i][j][k]=q0[q1[k]^j]; |
371 |
} |
372 |
} |
373 |
#endif |
374 |
|
375 |
needToBuildMDS=0; /* NEVER modify the table again! */ |
376 |
} |
377 |
|
378 |
/* |
379 |
+***************************************************************************** |
380 |
* |
381 |
* Function Name: ReverseRoundSubkeys |
382 |
* |
383 |
* Function: Reverse order of round subkeys to switch between encrypt/decrypt |
384 |
* |
385 |
* Arguments: key = ptr to keyInstance to be reversed |
386 |
* newDir = new direction value |
387 |
* |
388 |
* Return: None. |
389 |
* |
390 |
* Notes: |
391 |
* This optimization allows both blockEncrypt and blockDecrypt to use the same |
392 |
* "fallthru" switch statement based on the number of rounds. |
393 |
* Note that key->numRounds must be even and >= 2 here. |
394 |
* |
395 |
-****************************************************************************/ |
396 |
static void ReverseRoundSubkeys(keyInstance *key,BYTE newDir) |
397 |
{ |
398 |
DWORD t0,t1; |
399 |
register DWORD *r0=key->subKeys+ROUND_SUBKEYS; |
400 |
register DWORD *r1=r0 + 2*key->numRounds - 2; |
401 |
|
402 |
for (;r0 < r1;r0+=2,r1-=2) |
403 |
{ |
404 |
t0=r0[0]; /* swap the order */ |
405 |
t1=r0[1]; |
406 |
r0[0]=r1[0]; /* but keep relative order within pairs */ |
407 |
r0[1]=r1[1]; |
408 |
r1[0]=t0; |
409 |
r1[1]=t1; |
410 |
} |
411 |
|
412 |
key->direction=newDir; |
413 |
} |
414 |
|
415 |
/* |
416 |
+***************************************************************************** |
417 |
* |
418 |
* Function Name: Xor256 |
419 |
* |
420 |
* Function: Copy an 8-bit permutation (256 bytes), xoring with a byte |
421 |
* |
422 |
* Arguments: dst = where to put result |
423 |
* src = where to get data (can be same asa dst) |
424 |
* b = byte to xor |
425 |
* |
426 |
* Return: None |
427 |
* |
428 |
* Notes: |
429 |
* BorlandC's optimization is terrible! When we put the code inline, |
430 |
* it generates fairly good code in the *following* segment (not in the Xor256 |
431 |
* code itself). If the call is made, the code following the call is awful! |
432 |
* The penalty is nearly 50%! So we take the code size hit for inlining for |
433 |
* Borland, while Microsoft happily works with a call. |
434 |
* |
435 |
-****************************************************************************/ |
436 |
#if defined(__BORLANDC__) /* do it inline */ |
437 |
#define Xor32(dst,src,i) { ((DWORD *)dst)[i] = ((DWORD *)src)[i] ^ tmpX; } |
438 |
#define Xor256(dst,src,b) \ |
439 |
{ \ |
440 |
register DWORD tmpX=0x01010101u * b;\ |
441 |
for (i=0;i<64;i+=4) \ |
442 |
{ Xor32(dst,src,i ); Xor32(dst,src,i+1); Xor32(dst,src,i+2); Xor32(dst,src,i+3); } \ |
443 |
} |
444 |
#else /* do it as a function call */ |
445 |
static void Xor256(void *dst,void *src,BYTE b) |
446 |
{ |
447 |
register DWORD x=b*0x01010101u; /* replicate byte to all four bytes */ |
448 |
register DWORD *d=(DWORD *)dst; |
449 |
register DWORD *s=(DWORD *)src; |
450 |
#define X_8(N) { d[N]=s[N] ^ x; d[N+1]=s[N+1] ^ x; } |
451 |
#define X_32(N) { X_8(N); X_8(N+2); X_8(N+4); X_8(N+6); } |
452 |
X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */ |
453 |
d+=32; /* keep offsets small! */ |
454 |
s+=32; |
455 |
X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */ |
456 |
} |
457 |
#endif |
458 |
|
459 |
/* |
460 |
+***************************************************************************** |
461 |
* |
462 |
* Function Name: reKey |
463 |
* |
464 |
* Function: Initialize the Twofish key schedule from key32 |
465 |
* |
466 |
* Arguments: key = ptr to keyInstance to be initialized |
467 |
* |
468 |
* Return: TRUE on success |
469 |
* |
470 |
* Notes: |
471 |
* Here we precompute all the round subkeys, although that is not actually |
472 |
* required. For example, on a smartcard, the round subkeys can |
473 |
* be generated on-the-fly using f32() |
474 |
* |
475 |
-****************************************************************************/ |
476 |
static int reKey(keyInstance *key) |
477 |
{ |
478 |
int i,j,k64Cnt,keyLen; |
479 |
int subkeyCnt; |
480 |
DWORD A=0,B=0,q; |
481 |
DWORD sKey[MAX_KEY_BITS/64],k32e[MAX_KEY_BITS/64],k32o[MAX_KEY_BITS/64]; |
482 |
BYTE L0[256],L1[256]; /* small local 8-bit permutations */ |
483 |
|
484 |
#if VALIDATE_PARMS |
485 |
#if ALIGN32 |
486 |
if (((int)key) & 3) |
487 |
return BAD_ALIGN32; |
488 |
if ((key->keyLen % 64) || (key->keyLen < MIN_KEY_BITS)) |
489 |
return BAD_KEY_INSTANCE; |
490 |
#endif |
491 |
#endif |
492 |
|
493 |
if (needToBuildMDS) /* do this one time only */ |
494 |
BuildMDS(); |
495 |
|
496 |
#define F32(res,x,k32) \ |
497 |
{ \ |
498 |
DWORD t=x; \ |
499 |
switch (k64Cnt & 3) \ |
500 |
{ \ |
501 |
case 0: /* same as 4 */ \ |
502 |
b0(t) = p8(04)[b0(t)] ^ b0(k32[3]); \ |
503 |
b1(t) = p8(14)[b1(t)] ^ b1(k32[3]); \ |
504 |
b2(t) = p8(24)[b2(t)] ^ b2(k32[3]); \ |
505 |
b3(t) = p8(34)[b3(t)] ^ b3(k32[3]); \ |
506 |
/* fall thru, having pre-processed t */ \ |
507 |
case 3: b0(t) = p8(03)[b0(t)] ^ b0(k32[2]); \ |
508 |
b1(t) = p8(13)[b1(t)] ^ b1(k32[2]); \ |
509 |
b2(t) = p8(23)[b2(t)] ^ b2(k32[2]); \ |
510 |
b3(t) = p8(33)[b3(t)] ^ b3(k32[2]); \ |
511 |
/* fall thru, having pre-processed t */ \ |
512 |
case 2: /* 128-bit keys (optimize for this case) */ \ |
513 |
res= MDStab[0][p8(01)[p8(02)[b0(t)] ^ b0(k32[1])] ^ b0(k32[0])] ^ \ |
514 |
MDStab[1][p8(11)[p8(12)[b1(t)] ^ b1(k32[1])] ^ b1(k32[0])] ^ \ |
515 |
MDStab[2][p8(21)[p8(22)[b2(t)] ^ b2(k32[1])] ^ b2(k32[0])] ^ \ |
516 |
MDStab[3][p8(31)[p8(32)[b3(t)] ^ b3(k32[1])] ^ b3(k32[0])] ; \ |
517 |
} \ |
518 |
} |
519 |
|
520 |
|
521 |
#if !CHECK_TABLE |
522 |
#if defined(USE_ASM) /* only do this if not using assember */ |
523 |
if (!(useAsm & 4)) |
524 |
#endif |
525 |
#endif |
526 |
{ |
527 |
subkeyCnt = ROUND_SUBKEYS + 2*key->numRounds; |
528 |
keyLen=key->keyLen; |
529 |
k64Cnt=(keyLen+63)/64; /* number of 64-bit key words */ |
530 |
for (i=0,j=k64Cnt-1;i<k64Cnt;i++,j--) |
531 |
{ /* split into even/odd key dwords */ |
532 |
k32e[i]=key->key32[2*i ]; |
533 |
k32o[i]=key->key32[2*i+1]; |
534 |
/* compute S-box keys using (12,8) Reed-Solomon code over GF(256) */ |
535 |
sKey[j]=key->sboxKeys[j]=RS_MDS_Encode(k32e[i],k32o[i]); /* reverse order */ |
536 |
} |
537 |
} |
538 |
|
539 |
#ifdef USE_ASM |
540 |
if (useAsm & 4) |
541 |
{ |
542 |
#if defined(COMPILE_KEY) && defined(USE_ASM) |
543 |
key->keySig = VALID_SIG; /* show that we are initialized */ |
544 |
key->codeSize = sizeof(key->compiledCode); /* set size */ |
545 |
#endif |
546 |
reKey_86(key); |
547 |
} |
548 |
else |
549 |
#endif |
550 |
{ |
551 |
for (i=q=0;i<subkeyCnt/2;i++,q+=SK_STEP) |
552 |
{ /* compute round subkeys for PHT */ |
553 |
F32(A,q ,k32e); /* A uses even key dwords */ |
554 |
F32(B,q+SK_BUMP,k32o); /* B uses odd key dwords */ |
555 |
B = ROL(B,8); |
556 |
key->subKeys[2*i ] = A+B; /* combine with a PHT */ |
557 |
B = A + 2*B; |
558 |
key->subKeys[2*i+1] = ROL(B,SK_ROTL); |
559 |
} |
560 |
#if !defined(ZERO_KEY) |
561 |
switch (keyLen) /* case out key length for speed in generating S-boxes */ |
562 |
{ |
563 |
case 128: |
564 |
#if defined(FULL_KEY) || defined(PART_KEY) |
565 |
#if BIG_TAB |
566 |
#define one128(N,J) sbSet(N,i,J,L0[i+J]) |
567 |
#define sb128(N) { \ |
568 |
BYTE *qq=bigTab[N][b##N(sKey[1])]; \ |
569 |
Xor256(L0,qq,b##N(sKey[0])); \ |
570 |
for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } } |
571 |
#else |
572 |
#define one128(N,J) sbSet(N,i,J,p8(N##1)[L0[i+J]]^k0) |
573 |
#define sb128(N) { \ |
574 |
Xor256(L0,p8(N##2),b##N(sKey[1])); \ |
575 |
{ register DWORD k0=b##N(sKey[0]); \ |
576 |
for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } } } |
577 |
#endif |
578 |
#elif defined(MIN_KEY) |
579 |
#define sb128(N) Xor256(_sBox8_(N),p8(N##2),b##N(sKey[1])) |
580 |
#endif |
581 |
sb128(0); sb128(1); sb128(2); sb128(3); |
582 |
break; |
583 |
case 192: |
584 |
#if defined(FULL_KEY) || defined(PART_KEY) |
585 |
#define one192(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0) |
586 |
#define sb192(N) { \ |
587 |
Xor256(L0,p8(N##3),b##N(sKey[2])); \ |
588 |
{ register DWORD k0=b##N(sKey[0]); \ |
589 |
register DWORD k1=b##N(sKey[1]); \ |
590 |
for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } } |
591 |
#elif defined(MIN_KEY) |
592 |
#define one192(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1) |
593 |
#define sb192(N) { \ |
594 |
Xor256(L0,p8(N##3),b##N(sKey[2])); \ |
595 |
{ register DWORD k1=b##N(sKey[1]); \ |
596 |
for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } } |
597 |
#endif |
598 |
sb192(0); sb192(1); sb192(2); sb192(3); |
599 |
break; |
600 |
case 256: |
601 |
#if defined(FULL_KEY) || defined(PART_KEY) |
602 |
#define one256(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0) |
603 |
#define sb256(N) { \ |
604 |
Xor256(L1,p8(N##4),b##N(sKey[3])); \ |
605 |
for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \ |
606 |
L0[i+1]=p8(N##3)[L1[i+1]]; } \ |
607 |
Xor256(L0,L0,b##N(sKey[2])); \ |
608 |
{ register DWORD k0=b##N(sKey[0]); \ |
609 |
register DWORD k1=b##N(sKey[1]); \ |
610 |
for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } } |
611 |
#elif defined(MIN_KEY) |
612 |
#define one256(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1) |
613 |
#define sb256(N) { \ |
614 |
Xor256(L1,p8(N##4),b##N(sKey[3])); \ |
615 |
for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \ |
616 |
L0[i+1]=p8(N##3)[L1[i+1]]; } \ |
617 |
Xor256(L0,L0,b##N(sKey[2])); \ |
618 |
{ register DWORD k1=b##N(sKey[1]); \ |
619 |
for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } } |
620 |
#endif |
621 |
sb256(0); sb256(1); sb256(2); sb256(3); |
622 |
break; |
623 |
} |
624 |
#endif |
625 |
} |
626 |
|
627 |
#if CHECK_TABLE /* sanity check vs. pedagogical code*/ |
628 |
{ |
629 |
GetSboxKey; |
630 |
for (i=0;i<subkeyCnt/2;i++) |
631 |
{ |
632 |
A = f32(i*SK_STEP ,k32e,keyLen); /* A uses even key dwords */ |
633 |
B = f32(i*SK_STEP+SK_BUMP,k32o,keyLen); /* B uses odd key dwords */ |
634 |
B = ROL(B,8); |
635 |
assert(key->subKeys[2*i ] == A+ B); |
636 |
assert(key->subKeys[2*i+1] == ROL(A+2*B,SK_ROTL)); |
637 |
} |
638 |
#if !defined(ZERO_KEY) /* any S-boxes to check? */ |
639 |
for (i=q=0;i<256;i++,q+=0x01010101) |
640 |
assert(f32(q,key->sboxKeys,keyLen) == Fe32_(q,0)); |
641 |
#endif |
642 |
} |
643 |
#endif /* CHECK_TABLE */ |
644 |
|
645 |
if (key->direction == DIR_ENCRYPT) |
646 |
ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */ |
647 |
|
648 |
return TRUE; |
649 |
} |
650 |
/* |
651 |
+***************************************************************************** |
652 |
* |
653 |
* Function Name: makeKey |
654 |
* |
655 |
* Function: Initialize the Twofish key schedule |
656 |
* |
657 |
* Arguments: key = ptr to keyInstance to be initialized |
658 |
* direction = DIR_ENCRYPT or DIR_DECRYPT |
659 |
* keyLen = # bits of key text at *keyMaterial |
660 |
* keyMaterial = ptr to hex ASCII chars representing key bits |
661 |
* |
662 |
* Return: TRUE on success |
663 |
* else error code (e.g., BAD_KEY_DIR) |
664 |
* |
665 |
* Notes: This parses the key bits from keyMaterial. Zeroes out unused key bits |
666 |
* |
667 |
-****************************************************************************/ |
668 |
static int makeKey(keyInstance *key, BYTE direction, int keyLen,CONST char *keyMaterial) |
669 |
{ |
670 |
int i; |
671 |
|
672 |
#if VALIDATE_PARMS /* first, sanity check on parameters */ |
673 |
if (key == NULL) |
674 |
return BAD_KEY_INSTANCE;/* must have a keyInstance to initialize */ |
675 |
if ((direction != DIR_ENCRYPT) && (direction != DIR_DECRYPT)) |
676 |
return BAD_KEY_DIR; /* must have valid direction */ |
677 |
if ((keyLen > MAX_KEY_BITS) || (keyLen < 8) || (keyLen & 0x3F)) |
678 |
return BAD_KEY_MAT; /* length must be valid */ |
679 |
key->keySig = VALID_SIG; /* show that we are initialized */ |
680 |
#if ALIGN32 |
681 |
if ((((int)key) & 3) || (((int)key->key32) & 3)) |
682 |
return BAD_ALIGN32; |
683 |
#endif |
684 |
#endif |
685 |
|
686 |
key->direction = direction;/* set our cipher direction */ |
687 |
key->keyLen = (keyLen+63) & ~63; /* round up to multiple of 64 */ |
688 |
key->numRounds = numRounds[(keyLen-1)/64]; |
689 |
memset(key->key32,0,sizeof(key->key32)); /* zero unused bits */ |
690 |
|
691 |
if (keyMaterial == NULL) |
692 |
return TRUE; /* allow a "dummy" call */ |
693 |
|
694 |
for (i=0;i<keyLen/32;i++) /* make byte-oriented copy for CFB1 */ |
695 |
key->key32[i] = (((unsigned char *)keyMaterial)[i*4+0] << 0) |
696 |
| (((unsigned char *)keyMaterial)[i*4+1] << 8) |
697 |
| (((unsigned char *)keyMaterial)[i*4+2] << 16) |
698 |
| (((unsigned char *)keyMaterial)[i*4+3] << 24); |
699 |
|
700 |
return reKey(key); /* generate round subkeys */ |
701 |
} |
702 |
|
703 |
|
704 |
/* |
705 |
+***************************************************************************** |
706 |
* |
707 |
* Function Name: cipherInit |
708 |
* |
709 |
* Function: Initialize the Twofish cipher in a given mode |
710 |
* |
711 |
* Arguments: cipher = ptr to cipherInstance to be initialized |
712 |
* mode = MODE_ECB, MODE_CBC, or MODE_CFB1 |
713 |
* IV = ptr to hex ASCII test representing IV bytes |
714 |
* |
715 |
* Return: TRUE on success |
716 |
* else error code (e.g., BAD_CIPHER_MODE) |
717 |
* |
718 |
-****************************************************************************/ |
719 |
static int cipherInit(cipherInstance *cipher, BYTE mode,CONST char *IV) |
720 |
{ |
721 |
int i; |
722 |
#if VALIDATE_PARMS /* first, sanity check on parameters */ |
723 |
if (cipher == NULL) |
724 |
return BAD_PARAMS; /* must have a cipherInstance to initialize */ |
725 |
if ((mode != MODE_ECB) && (mode != MODE_CBC) && (mode != MODE_CFB1)) |
726 |
return BAD_CIPHER_MODE; /* must have valid cipher mode */ |
727 |
cipher->cipherSig = VALID_SIG; |
728 |
#if ALIGN32 |
729 |
if ((((int)cipher) & 3) || (((int)cipher->IV) & 3) || (((int)cipher->iv32) & 3)) |
730 |
return BAD_ALIGN32; |
731 |
#endif |
732 |
#endif |
733 |
|
734 |
if ((mode != MODE_ECB) && (IV)) /* parse the IV */ |
735 |
{ |
736 |
memcpy (cipher->iv32, IV, BLOCK_SIZE/32); |
737 |
for (i=0;i<BLOCK_SIZE/32;i++) /* make byte-oriented copy for CFB1 */ |
738 |
((DWORD *)cipher->IV)[i] = Bswap(cipher->iv32[i]); |
739 |
} |
740 |
|
741 |
cipher->mode = mode; |
742 |
|
743 |
return TRUE; |
744 |
} |
745 |
|
746 |
/* |
747 |
+***************************************************************************** |
748 |
* |
749 |
* Function Name: blockEncrypt |
750 |
* |
751 |
* Function: Encrypt block(s) of data using Twofish |
752 |
* |
753 |
* Arguments: cipher = ptr to already initialized cipherInstance |
754 |
* key = ptr to already initialized keyInstance |
755 |
* input = ptr to data blocks to be encrypted |
756 |
* inputLen = # bits to encrypt (multiple of blockSize) |
757 |
* outBuffer = ptr to where to put encrypted blocks |
758 |
* |
759 |
* Return: # bits ciphered (>= 0) |
760 |
* else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL) |
761 |
* |
762 |
* Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits. |
763 |
* If inputLen is not a multiple of BLOCK_SIZE bits in those modes, |
764 |
* an error BAD_INPUT_LEN is returned. In CFB1 mode, all block |
765 |
* sizes can be supported. |
766 |
* |
767 |
-****************************************************************************/ |
768 |
static int blockEncrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input, |
769 |
int inputLen, BYTE *outBuffer) |
770 |
{ |
771 |
int i,n; /* loop counters */ |
772 |
DWORD x[BLOCK_SIZE/32]; /* block being encrypted */ |
773 |
DWORD t0,t1; /* temp variables */ |
774 |
int rounds=key->numRounds; /* number of rounds */ |
775 |
BYTE bit,bit0,ctBit,carry; /* temps for CFB */ |
776 |
|
777 |
/* make local copies of things for faster access */ |
778 |
int mode = cipher->mode; |
779 |
DWORD sk[TOTAL_SUBKEYS]; |
780 |
DWORD IV[BLOCK_SIZE/32]; |
781 |
|
782 |
GetSboxKey; |
783 |
|
784 |
#if VALIDATE_PARMS |
785 |
if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG)) |
786 |
return BAD_CIPHER_STATE; |
787 |
if ((key == NULL) || (key->keySig != VALID_SIG)) |
788 |
return BAD_KEY_INSTANCE; |
789 |
if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1)) |
790 |
return BAD_KEY_INSTANCE; |
791 |
if ((mode != MODE_CFB1) && (inputLen % BLOCK_SIZE)) |
792 |
return BAD_INPUT_LEN; |
793 |
#if ALIGN32 |
794 |
if ( (((int)cipher) & 3) || (((int)key ) & 3) || |
795 |
(((int)input ) & 3) || (((int)outBuffer) & 3)) |
796 |
return BAD_ALIGN32; |
797 |
#endif |
798 |
#endif |
799 |
|
800 |
if (mode == MODE_CFB1) |
801 |
{ /* use recursion here to handle CFB, one block at a time */ |
802 |
cipher->mode = MODE_ECB; /* do encryption in ECB */ |
803 |
for (n=0;n<inputLen;n++) |
804 |
{ |
805 |
blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x); |
806 |
bit0 = 0x80 >> (n & 7);/* which bit position in byte */ |
807 |
ctBit = (input[n/8] & bit0) ^ ((((BYTE *) x)[0] & 0x80) >> (n&7)); |
808 |
outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) | ctBit; |
809 |
carry = ctBit >> (7 - (n&7)); |
810 |
for (i=BLOCK_SIZE/8-1;i>=0;i--) |
811 |
{ |
812 |
bit = cipher->IV[i] >> 7; /* save next "carry" from shift */ |
813 |
cipher->IV[i] = (cipher->IV[i] << 1) ^ carry; |
814 |
carry = bit; |
815 |
} |
816 |
} |
817 |
cipher->mode = MODE_CFB1; /* restore mode for next time */ |
818 |
return inputLen; |
819 |
} |
820 |
|
821 |
/* here for ECB, CBC modes */ |
822 |
if (key->direction != DIR_ENCRYPT) |
823 |
ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */ |
824 |
|
825 |
#ifdef USE_ASM |
826 |
if ((useAsm & 1) && (inputLen)) |
827 |
#ifdef COMPILE_KEY |
828 |
if (key->keySig == VALID_SIG) |
829 |
return ((CipherProc *)(key->encryptFuncPtr))(cipher,key,input,inputLen,outBuffer); |
830 |
#else |
831 |
return (*blockEncrypt_86)(cipher,key,input,inputLen,outBuffer); |
832 |
#endif |
833 |
#endif |
834 |
/* make local copy of subkeys for speed */ |
835 |
memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds)); |
836 |
if (mode == MODE_CBC) |
837 |
BlockCopy(IV,cipher->iv32) |
838 |
else |
839 |
IV[0]=IV[1]=IV[2]=IV[3]=0; |
840 |
|
841 |
for (n=0;n<inputLen;n+=BLOCK_SIZE,input+=BLOCK_SIZE/8,outBuffer+=BLOCK_SIZE/8) |
842 |
{ |
843 |
#define LoadBlockE(N) x[N]=Bswap(((DWORD *)input)[N]) ^ sk[INPUT_WHITEN+N] ^ IV[N] |
844 |
LoadBlockE(0); LoadBlockE(1); LoadBlockE(2); LoadBlockE(3); |
845 |
#define EncryptRound(K,R,id) \ |
846 |
t0 = Fe32##id(x[K ],0); \ |
847 |
t1 = Fe32##id(x[K^1],3); \ |
848 |
x[K^3] = ROL(x[K^3],1); \ |
849 |
x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \ |
850 |
x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \ |
851 |
x[K^2] = ROR(x[K^2],1); |
852 |
#define Encrypt2(R,id) { EncryptRound(0,R+1,id); EncryptRound(2,R,id); } |
853 |
|
854 |
#if defined(ZERO_KEY) |
855 |
switch (key->keyLen) |
856 |
{ |
857 |
case 128: |
858 |
for (i=rounds-2;i>=0;i-=2) |
859 |
Encrypt2(i,_128); |
860 |
break; |
861 |
case 192: |
862 |
for (i=rounds-2;i>=0;i-=2) |
863 |
Encrypt2(i,_192); |
864 |
break; |
865 |
case 256: |
866 |
for (i=rounds-2;i>=0;i-=2) |
867 |
Encrypt2(i,_256); |
868 |
break; |
869 |
} |
870 |
#else |
871 |
Encrypt2(14,_); |
872 |
Encrypt2(12,_); |
873 |
Encrypt2(10,_); |
874 |
Encrypt2( 8,_); |
875 |
Encrypt2( 6,_); |
876 |
Encrypt2( 4,_); |
877 |
Encrypt2( 2,_); |
878 |
Encrypt2( 0,_); |
879 |
#endif |
880 |
|
881 |
/* need to do (or undo, depending on your point of view) final swap */ |
882 |
#if LittleEndian |
883 |
#define StoreBlockE(N) ((DWORD *)outBuffer)[N]=x[N^2] ^ sk[OUTPUT_WHITEN+N] |
884 |
#else |
885 |
#define StoreBlockE(N) { t0=x[N^2] ^ sk[OUTPUT_WHITEN+N]; ((DWORD *)outBuffer)[N]=Bswap(t0); } |
886 |
#endif |
887 |
StoreBlockE(0); StoreBlockE(1); StoreBlockE(2); StoreBlockE(3); |
888 |
if (mode == MODE_CBC) |
889 |
{ |
890 |
IV[0]=Bswap(((DWORD *)outBuffer)[0]); |
891 |
IV[1]=Bswap(((DWORD *)outBuffer)[1]); |
892 |
IV[2]=Bswap(((DWORD *)outBuffer)[2]); |
893 |
IV[3]=Bswap(((DWORD *)outBuffer)[3]); |
894 |
} |
895 |
} |
896 |
|
897 |
if (mode == MODE_CBC) |
898 |
BlockCopy(cipher->iv32,IV); |
899 |
|
900 |
return inputLen; |
901 |
} |
902 |
|
903 |
/* |
904 |
+***************************************************************************** |
905 |
* |
906 |
* Function Name: blockDecrypt |
907 |
* |
908 |
* Function: Decrypt block(s) of data using Twofish |
909 |
* |
910 |
* Arguments: cipher = ptr to already initialized cipherInstance |
911 |
* key = ptr to already initialized keyInstance |
912 |
* input = ptr to data blocks to be decrypted |
913 |
* inputLen = # bits to encrypt (multiple of blockSize) |
914 |
* outBuffer = ptr to where to put decrypted blocks |
915 |
* |
916 |
* Return: # bits ciphered (>= 0) |
917 |
* else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL) |
918 |
* |
919 |
* Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits. |
920 |
* If inputLen is not a multiple of BLOCK_SIZE bits in those modes, |
921 |
* an error BAD_INPUT_LEN is returned. In CFB1 mode, all block |
922 |
* sizes can be supported. |
923 |
* |
924 |
-****************************************************************************/ |
925 |
static int blockDecrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input, |
926 |
int inputLen, BYTE *outBuffer) |
927 |
{ |
928 |
int i,n; /* loop counters */ |
929 |
DWORD x[BLOCK_SIZE/32]; /* block being encrypted */ |
930 |
DWORD t0,t1; /* temp variables */ |
931 |
int rounds=key->numRounds; /* number of rounds */ |
932 |
BYTE bit,bit0,ctBit,carry; /* temps for CFB */ |
933 |
|
934 |
/* make local copies of things for faster access */ |
935 |
int mode = cipher->mode; |
936 |
DWORD sk[TOTAL_SUBKEYS]; |
937 |
DWORD IV[BLOCK_SIZE/32]; |
938 |
|
939 |
GetSboxKey; |
940 |
|
941 |
#if VALIDATE_PARMS |
942 |
if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG)) |
943 |
return BAD_CIPHER_STATE; |
944 |
if ((key == NULL) || (key->keySig != VALID_SIG)) |
945 |
return BAD_KEY_INSTANCE; |
946 |
if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1)) |
947 |
return BAD_KEY_INSTANCE; |
948 |
if ((cipher->mode != MODE_CFB1) && (inputLen % BLOCK_SIZE)) |
949 |
return BAD_INPUT_LEN; |
950 |
#if ALIGN32 |
951 |
if ( (((int)cipher) & 3) || (((int)key ) & 3) || |
952 |
(((int)input) & 3) || (((int)outBuffer) & 3)) |
953 |
return BAD_ALIGN32; |
954 |
#endif |
955 |
#endif |
956 |
|
957 |
if (cipher->mode == MODE_CFB1) |
958 |
{ /* use blockEncrypt here to handle CFB, one block at a time */ |
959 |
cipher->mode = MODE_ECB; /* do encryption in ECB */ |
960 |
for (n=0;n<inputLen;n++) |
961 |
{ |
962 |
blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x); |
963 |
bit0 = 0x80 >> (n & 7); |
964 |
ctBit = input[n/8] & bit0; |
965 |
outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) | |
966 |
(ctBit ^ ((((BYTE *) x)[0] & 0x80) >> (n&7))); |
967 |
carry = ctBit >> (7 - (n&7)); |
968 |
for (i=BLOCK_SIZE/8-1;i>=0;i--) |
969 |
{ |
970 |
bit = cipher->IV[i] >> 7; /* save next "carry" from shift */ |
971 |
cipher->IV[i] = (cipher->IV[i] << 1) ^ carry; |
972 |
carry = bit; |
973 |
} |
974 |
} |
975 |
cipher->mode = MODE_CFB1; /* restore mode for next time */ |
976 |
return inputLen; |
977 |
} |
978 |
|
979 |
/* here for ECB, CBC modes */ |
980 |
if (key->direction != DIR_DECRYPT) |
981 |
ReverseRoundSubkeys(key,DIR_DECRYPT); /* reverse the round subkey order */ |
982 |
#ifdef USE_ASM |
983 |
if ((useAsm & 2) && (inputLen)) |
984 |
#ifdef COMPILE_KEY |
985 |
if (key->keySig == VALID_SIG) |
986 |
return ((CipherProc *)(key->decryptFuncPtr))(cipher,key,input,inputLen,outBuffer); |
987 |
#else |
988 |
return (*blockDecrypt_86)(cipher,key,input,inputLen,outBuffer); |
989 |
#endif |
990 |
#endif |
991 |
/* make local copy of subkeys for speed */ |
992 |
memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds)); |
993 |
if (mode == MODE_CBC) |
994 |
BlockCopy(IV,cipher->iv32) |
995 |
else |
996 |
IV[0]=IV[1]=IV[2]=IV[3]=0; |
997 |
|
998 |
for (n=0;n<inputLen;n+=BLOCK_SIZE,input+=BLOCK_SIZE/8,outBuffer+=BLOCK_SIZE/8) |
999 |
{ |
1000 |
#define LoadBlockD(N) x[N^2]=Bswap(((DWORD *)input)[N]) ^ sk[OUTPUT_WHITEN+N] |
1001 |
LoadBlockD(0); LoadBlockD(1); LoadBlockD(2); LoadBlockD(3); |
1002 |
|
1003 |
#define DecryptRound(K,R,id) \ |
1004 |
t0 = Fe32##id(x[K ],0); \ |
1005 |
t1 = Fe32##id(x[K^1],3); \ |
1006 |
x[K^2] = ROL (x[K^2],1); \ |
1007 |
x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \ |
1008 |
x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \ |
1009 |
x[K^3] = ROR (x[K^3],1); |
1010 |
|
1011 |
#define Decrypt2(R,id) { DecryptRound(2,R+1,id); DecryptRound(0,R,id); } |
1012 |
|
1013 |
#if defined(ZERO_KEY) |
1014 |
switch (key->keyLen) |
1015 |
{ |
1016 |
case 128: |
1017 |
for (i=rounds-2;i>=0;i-=2) |
1018 |
Decrypt2(i,_128); |
1019 |
break; |
1020 |
case 192: |
1021 |
for (i=rounds-2;i>=0;i-=2) |
1022 |
Decrypt2(i,_192); |
1023 |
break; |
1024 |
case 256: |
1025 |
for (i=rounds-2;i>=0;i-=2) |
1026 |
Decrypt2(i,_256); |
1027 |
break; |
1028 |
} |
1029 |
#else |
1030 |
{ |
1031 |
Decrypt2(14,_); |
1032 |
Decrypt2(12,_); |
1033 |
Decrypt2(10,_); |
1034 |
Decrypt2( 8,_); |
1035 |
Decrypt2( 6,_); |
1036 |
Decrypt2( 4,_); |
1037 |
Decrypt2( 2,_); |
1038 |
Decrypt2( 0,_); |
1039 |
} |
1040 |
#endif |
1041 |
if (cipher->mode == MODE_ECB) |
1042 |
{ |
1043 |
#if LittleEndian |
1044 |
#define StoreBlockD(N) ((DWORD *)outBuffer)[N] = x[N] ^ sk[INPUT_WHITEN+N] |
1045 |
#else |
1046 |
#define StoreBlockD(N) { t0=x[N]^sk[INPUT_WHITEN+N]; ((DWORD *)outBuffer)[N] = Bswap(t0); } |
1047 |
#endif |
1048 |
StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3); |
1049 |
#undef StoreBlockD |
1050 |
continue; |
1051 |
} |
1052 |
else |
1053 |
{ |
1054 |
#define StoreBlockD(N) x[N] ^= sk[INPUT_WHITEN+N] ^ IV[N]; \ |
1055 |
IV[N] = Bswap(((DWORD *)input)[N]); \ |
1056 |
((DWORD *)outBuffer)[N] = Bswap(x[N]); |
1057 |
StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3); |
1058 |
#undef StoreBlockD |
1059 |
} |
1060 |
} |
1061 |
if (mode == MODE_CBC) /* restore iv32 to cipher */ |
1062 |
BlockCopy(cipher->iv32,IV) |
1063 |
|
1064 |
return inputLen; |
1065 |
} |
1066 |
|
1067 |
#ifdef GetCodeSize |
1068 |
static DWORD TwofishCodeSize(void) |
1069 |
{ |
1070 |
DWORD x= Here(0); |
1071 |
#ifdef USE_ASM |
1072 |
if (useAsm & 3) |
1073 |
return TwofishAsmCodeSize(); |
1074 |
#endif |
1075 |
return x - TwofishCodeStart(); |
1076 |
}; |
1077 |
#endif |