1 |
pippijn |
1.2 |
/** |
2 |
pippijn |
1.1 |
* random.C: SIMD oriented Fast Mersenne Twister(SFMT) |
3 |
|
|
* |
4 |
pippijn |
1.3 |
* Copyright © 2007 Pippijn van Steenhoven / The Ermyth Team |
5 |
|
|
* Rights to this code are as documented in COPYING. |
6 |
|
|
* |
7 |
|
|
* |
8 |
|
|
* Portions of this file were derived from sources bearing the following license: |
9 |
|
|
* |
10 |
pippijn |
1.1 |
* Copyright © 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima |
11 |
|
|
* University. All rights reserved. |
12 |
|
|
* |
13 |
|
|
* @author Mutsuo Saito (Hiroshima University) |
14 |
|
|
* @author Makoto Matsumoto (Hiroshima University) |
15 |
|
|
* |
16 |
pippijn |
1.3 |
* The new BSD License is applied to this software. |
17 |
pippijn |
1.1 |
*/ |
18 |
pippijn |
1.2 |
|
19 |
pippijn |
1.4 |
static char const rcsid[] = "$Id: random.C,v 1.3 2007-09-16 18:54:45 pippijn Exp $"; |
20 |
pippijn |
1.2 |
|
21 |
pippijn |
1.1 |
#include <string.h> |
22 |
|
|
#include <assert.h> |
23 |
|
|
|
24 |
|
|
#include <common/random.h> |
25 |
|
|
|
26 |
|
|
/*----------------- |
27 |
|
|
BASIC DEFINITIONS |
28 |
|
|
-----------------*/ |
29 |
|
|
/** SFMT generator has an internal state array of 128-bit integers, |
30 |
|
|
* and N is its size. */ |
31 |
|
|
#define N (216091 / 128 + 1) |
32 |
|
|
/** N32 is the size of internal state array when regarded as an array |
33 |
|
|
* of 32-bit integers.*/ |
34 |
|
|
#define N32 (N * 4) |
35 |
|
|
/** N64 is the size of internal state array when regarded as an array |
36 |
|
|
* of 64-bit integers.*/ |
37 |
|
|
#define N64 (N * 2) |
38 |
|
|
|
39 |
|
|
/*---------------------- |
40 |
|
|
the parameters of SFMT |
41 |
|
|
following definitions are in paramsXXXX.h file. |
42 |
|
|
----------------------*/ |
43 |
|
|
// the pick up position of the array. |
44 |
|
|
#define POS1 627 |
45 |
|
|
// the parameter of shift left as four 32-bit registers. |
46 |
|
|
#define SL1 11 |
47 |
|
|
// the parameter of shift left as one 128-bit register. |
48 |
|
|
// The 128-bit integer is shifted by (SL2 * 8) bits. |
49 |
|
|
#define SL2 3 |
50 |
|
|
// the parameter of shift right as four 32-bit registers. |
51 |
|
|
#define SR1 10 |
52 |
|
|
// the parameter of shift right as one 128-bit register. |
53 |
|
|
// The 128-bit integer is shifted by (SL2 * 8) bits. |
54 |
|
|
#define SR2 1 |
55 |
|
|
// A bitmask, used in the recursion. These parameters are introduced |
56 |
|
|
// to break symmetry of SIMD. |
57 |
|
|
#define MSK1 0xbff7bff7U |
58 |
|
|
#define MSK2 0xbfffffffU |
59 |
|
|
#define MSK3 0xbffffa7fU |
60 |
|
|
#define MSK4 0xffddfbfbU |
61 |
|
|
// These definitions are part of a 128-bit period certification vector. |
62 |
|
|
#define PARITY1 0xf8000001U |
63 |
|
|
#define PARITY2 0x89e80709U |
64 |
|
|
#define PARITY3 0x3bd2b64bU |
65 |
|
|
#define PARITY4 0x0c64b1e4U |
66 |
|
|
|
67 |
|
|
/* PARAMETERS FOR ALTIVEC */ |
68 |
|
|
#if defined(__APPLE__) /* For OSX */ |
69 |
|
|
#define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) |
70 |
|
|
#define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) |
71 |
|
|
#define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) |
72 |
|
|
#define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) |
73 |
|
|
#define ALTI_SL2_PERM (vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10) |
74 |
|
|
#define ALTI_SL2_PERM64 (vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2) |
75 |
|
|
#define ALTI_SR2_PERM (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14) |
76 |
|
|
#define ALTI_SR2_PERM64 (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14) |
77 |
|
|
#else // !__APPLE__ |
78 |
|
|
#define ALTI_SL1 {SL1, SL1, SL1, SL1} |
79 |
|
|
#define ALTI_SR1 {SR1, SR1, SR1, SR1} |
80 |
|
|
#define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} |
81 |
|
|
#define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} |
82 |
|
|
#define ALTI_SL2_PERM { 3,21,21,21, 7, 0, 1, 2,11, 4, 5, 6,15, 8, 9,10} |
83 |
|
|
#define ALTI_SL2_PERM64 { 3, 4, 5, 6, 7,29,29,29,11,12,13,14,15, 0, 1, 2} |
84 |
|
|
#define ALTI_SR2_PERM { 7, 0, 1, 2,11, 4, 5, 6,15, 8, 9,10,17,12,13,14} |
85 |
|
|
#define ALTI_SR2_PERM64 {15, 0, 1, 2, 3, 4, 5, 6,17, 8, 9,10,11,12,13,14} |
86 |
|
|
#endif // __APPLE__ |
87 |
|
|
#define IDSTR "SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb" |
88 |
|
|
|
89 |
|
|
#if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64) |
90 |
|
|
#define BIG_ENDIAN64 1 |
91 |
|
|
#endif |
92 |
|
|
#if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64) |
93 |
|
|
#define BIG_ENDIAN64 1 |
94 |
|
|
#endif |
95 |
|
|
#if defined(ONLY64) && !defined(BIG_ENDIAN64) |
96 |
|
|
#if defined(__GNUC__) |
97 |
|
|
#error "-DONLY64 must be specified with -DBIG_ENDIAN64" |
98 |
|
|
#endif |
99 |
|
|
#undef ONLY64 |
100 |
|
|
#endif |
101 |
|
|
|
102 |
|
|
/*-------------------------------------- |
103 |
|
|
FILE GLOBAL VARIABLES |
104 |
|
|
internal state, index counter and flag |
105 |
|
|
--------------------------------------*/ |
106 |
|
|
/** the 128-bit internal state array */ |
107 |
|
|
static w128_t sfmt[N]; |
108 |
|
|
/** the 32bit integer pointer to the 128-bit internal state array */ |
109 |
|
|
static uint32_t *psfmt32 = &sfmt[0].u[0]; |
110 |
|
|
#if !defined(BIG_ENDIAN64) || defined(ONLY64) |
111 |
|
|
/** the 64bit integer pointer to the 128-bit internal state array */ |
112 |
|
|
static uint64_t *psfmt64 = reinterpret_cast<uint64_t *> (&sfmt[0].u[0]); |
113 |
|
|
#endif |
114 |
|
|
/** index counter to the 32-bit internal state array */ |
115 |
|
|
static int idx; |
116 |
|
|
/** a flag: it is 0 if and only if the internal state is not yet |
117 |
|
|
* initialized. */ |
118 |
|
|
static int initialized = 0; |
119 |
|
|
/** a parity check vector which certificate the period of 2^{MEXP} */ |
120 |
|
|
static uint32_t parity[4] = { PARITY1, PARITY2, PARITY3, PARITY4 }; |
121 |
|
|
|
122 |
|
|
/*---------------- |
123 |
|
|
STATIC FUNCTIONS |
124 |
|
|
----------------*/ |
125 |
|
|
inline static int idxof (int i); |
126 |
|
|
inline static void rshift128 (w128_t *out, w128_t const *in, int shift); |
127 |
|
|
inline static void lshift128 (w128_t *out, w128_t const *in, int shift); |
128 |
|
|
inline static void gen_rand_all (void); |
129 |
|
|
inline static void gen_rand_array (w128_t *array, int size); |
130 |
|
|
inline static uint32_t func1 (uint32_t x); |
131 |
|
|
inline static uint32_t func2 (uint32_t x); |
132 |
|
|
inline static void period_certification (void); |
133 |
|
|
#if defined(BIG_ENDIAN64) && !defined(ONLY64) |
134 |
|
|
inline static void swap (w128_t *array, int size); |
135 |
|
|
#endif |
136 |
|
|
|
137 |
|
|
#if defined(HAVE_ALTIVEC) |
138 |
|
|
inline static vector unsigned int |
139 |
|
|
vec_recursion (vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d) ALWAYSINLINE; |
140 |
|
|
|
141 |
|
|
/** |
142 |
|
|
* This function represents the recursion formula in AltiVec and BIG ENDIAN. |
143 |
|
|
* @param a a 128-bit part of the interal state array |
144 |
|
|
* @param b a 128-bit part of the interal state array |
145 |
|
|
* @param c a 128-bit part of the interal state array |
146 |
|
|
* @param d a 128-bit part of the interal state array |
147 |
|
|
* @return output |
148 |
|
|
*/ |
149 |
|
|
inline static vector unsigned int vec_recursion (vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d) |
150 |
|
|
{ |
151 |
|
|
const vector unsigned int sl1 = ALTI_SL1; |
152 |
|
|
const vector unsigned int sr1 = ALTI_SR1; |
153 |
|
|
#ifdef ONLY64 |
154 |
|
|
const vector unsigned int mask = ALTI_MSK64; |
155 |
|
|
const vector unsigned char perm_sl = ALTI_SL2_PERM64; |
156 |
|
|
const vector unsigned char perm_sr = ALTI_SR2_PERM64; |
157 |
|
|
#else |
158 |
|
|
const vector unsigned int mask = ALTI_MSK; |
159 |
|
|
const vector unsigned char perm_sl = ALTI_SL2_PERM; |
160 |
|
|
const vector unsigned char perm_sr = ALTI_SR2_PERM; |
161 |
|
|
#endif |
162 |
|
|
vector unsigned int v, w, x, y, z; |
163 |
|
|
x = vec_perm (a, (vector unsigned int) perm_sl, perm_sl); |
164 |
|
|
v = a; |
165 |
|
|
y = vec_sr (b, sr1); |
166 |
|
|
z = vec_perm (c, (vector unsigned int) perm_sr, perm_sr); |
167 |
|
|
w = vec_sl (d, sl1); |
168 |
|
|
z = vec_xor (z, w); |
169 |
|
|
y = vec_and (y, mask); |
170 |
|
|
v = vec_xor (v, x); |
171 |
|
|
z = vec_xor (z, y); |
172 |
|
|
z = vec_xor (z, v); |
173 |
|
|
return z; |
174 |
|
|
} |
175 |
|
|
|
176 |
|
|
/** |
177 |
|
|
* This function fills the internal state array with pseudorandom |
178 |
|
|
* integers. |
179 |
|
|
*/ |
180 |
|
|
inline static void |
181 |
|
|
gen_rand_all (void) |
182 |
|
|
{ |
183 |
|
|
int i; |
184 |
|
|
vector unsigned int r, r1, r2; |
185 |
|
|
|
186 |
|
|
r1 = sfmt[N - 2].s; |
187 |
|
|
r2 = sfmt[N - 1].s; |
188 |
|
|
for (i = 0; i < N - POS1; i++) |
189 |
|
|
{ |
190 |
|
|
r = vec_recursion (sfmt[i].s, sfmt[i + POS1].s, r1, r2); |
191 |
|
|
sfmt[i].s = r; |
192 |
|
|
r1 = r2; |
193 |
|
|
r2 = r; |
194 |
|
|
} |
195 |
|
|
for (; i < N; i++) |
196 |
|
|
{ |
197 |
|
|
r = vec_recursion (sfmt[i].s, sfmt[i + POS1 - N].s, r1, r2); |
198 |
|
|
sfmt[i].s = r; |
199 |
|
|
r1 = r2; |
200 |
|
|
r2 = r; |
201 |
|
|
} |
202 |
|
|
} |
203 |
|
|
|
204 |
|
|
/** |
205 |
|
|
* This function fills the user-specified array with pseudorandom |
206 |
|
|
* integers. |
207 |
|
|
* |
208 |
|
|
* @param array an 128-bit array to be filled by pseudorandom numbers. |
209 |
|
|
* @param size number of 128-bit pesudorandom numbers to be generated. |
210 |
|
|
*/ |
211 |
|
|
inline static void |
212 |
|
|
gen_rand_array (w128_t *array, int size) |
213 |
|
|
{ |
214 |
|
|
int i, j; |
215 |
|
|
vector unsigned int r, r1, r2; |
216 |
|
|
|
217 |
|
|
r1 = sfmt[N - 2].s; |
218 |
|
|
r2 = sfmt[N - 1].s; |
219 |
|
|
for (i = 0; i < N - POS1; i++) |
220 |
|
|
{ |
221 |
|
|
r = vec_recursion (sfmt[i].s, sfmt[i + POS1].s, r1, r2); |
222 |
|
|
array[i].s = r; |
223 |
|
|
r1 = r2; |
224 |
|
|
r2 = r; |
225 |
|
|
} |
226 |
|
|
for (; i < N; i++) |
227 |
|
|
{ |
228 |
|
|
r = vec_recursion (sfmt[i].s, array[i + POS1 - N].s, r1, r2); |
229 |
|
|
array[i].s = r; |
230 |
|
|
r1 = r2; |
231 |
|
|
r2 = r; |
232 |
|
|
} |
233 |
|
|
/* main loop */ |
234 |
|
|
for (; i < size - N; i++) |
235 |
|
|
{ |
236 |
|
|
r = vec_recursion (array[i - N].s, array[i + POS1 - N].s, r1, r2); |
237 |
|
|
array[i].s = r; |
238 |
|
|
r1 = r2; |
239 |
|
|
r2 = r; |
240 |
|
|
} |
241 |
|
|
for (j = 0; j < 2 * N - size; j++) |
242 |
|
|
{ |
243 |
|
|
sfmt[j].s = array[j + size - N].s; |
244 |
|
|
} |
245 |
|
|
for (; i < size; i++) |
246 |
|
|
{ |
247 |
|
|
r = vec_recursion (array[i - N].s, array[i + POS1 - N].s, r1, r2); |
248 |
|
|
array[i].s = r; |
249 |
|
|
sfmt[j++].s = r; |
250 |
|
|
r1 = r2; |
251 |
|
|
r2 = r; |
252 |
|
|
} |
253 |
|
|
} |
254 |
|
|
|
255 |
|
|
#ifndef ONLY64 |
256 |
|
|
#if defined(__APPLE__) |
257 |
|
|
#define ALTI_SWAP (vector unsigned char) \ |
258 |
|
|
(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11) |
259 |
|
|
#else |
260 |
|
|
#define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11} |
261 |
|
|
#endif |
262 |
|
|
/** |
263 |
|
|
* This function swaps high and low 32-bit of 64-bit integers in user |
264 |
|
|
* specified array. |
265 |
|
|
* |
266 |
|
|
* @param array an 128-bit array to be swaped. |
267 |
|
|
* @param size size of 128-bit array. |
268 |
|
|
*/ |
269 |
|
|
inline static void |
270 |
|
|
swap (w128_t *array, int size) |
271 |
|
|
{ |
272 |
|
|
int i; |
273 |
|
|
const vector unsigned char perm = ALTI_SWAP; |
274 |
|
|
|
275 |
|
|
for (i = 0; i < size; i++) |
276 |
|
|
{ |
277 |
|
|
array[i].s = vec_perm (array[i].s, (vector unsigned int) perm, perm); |
278 |
|
|
} |
279 |
|
|
} |
280 |
|
|
#endif |
281 |
|
|
#elif defined(HAVE_SSE2) |
282 |
|
|
inline static __m128i |
283 |
|
|
mm_recursion (__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask); |
284 |
|
|
|
285 |
|
|
/** |
286 |
|
|
* This function represents the recursion formula. |
287 |
|
|
* @param a a 128-bit part of the interal state array |
288 |
|
|
* @param b a 128-bit part of the interal state array |
289 |
|
|
* @param c a 128-bit part of the interal state array |
290 |
|
|
* @param d a 128-bit part of the interal state array |
291 |
|
|
* @param mask 128-bit mask |
292 |
|
|
* @return output |
293 |
|
|
*/ |
294 |
|
|
inline static __m128i mm_recursion (__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask) |
295 |
|
|
{ |
296 |
|
|
__m128i v, x, y, z; |
297 |
|
|
|
298 |
|
|
x = _mm_load_si128 (a); |
299 |
|
|
y = _mm_srli_epi32 (*b, SR1); |
300 |
|
|
z = _mm_srli_si128 (c, SR2); |
301 |
|
|
v = _mm_slli_epi32 (d, SL1); |
302 |
|
|
z = _mm_xor_si128 (z, x); |
303 |
|
|
z = _mm_xor_si128 (z, v); |
304 |
|
|
x = _mm_slli_si128 (x, SL2); |
305 |
|
|
y = _mm_and_si128 (y, mask); |
306 |
|
|
z = _mm_xor_si128 (z, x); |
307 |
|
|
z = _mm_xor_si128 (z, y); |
308 |
|
|
return z; |
309 |
|
|
} |
310 |
|
|
|
311 |
|
|
/** |
312 |
|
|
* This function fills the internal state array with pseudorandom |
313 |
|
|
* integers. |
314 |
|
|
*/ |
315 |
|
|
inline static void |
316 |
|
|
gen_rand_all (void) |
317 |
|
|
{ |
318 |
|
|
int i; |
319 |
|
|
__m128i r, r1, r2, mask; |
320 |
|
|
mask = _mm_set_epi32 (MSK4, MSK3, MSK2, MSK1); |
321 |
|
|
|
322 |
|
|
r1 = _mm_load_si128 (&sfmt[N - 2].si); |
323 |
|
|
r2 = _mm_load_si128 (&sfmt[N - 1].si); |
324 |
|
|
for (i = 0; i < N - POS1; i++) |
325 |
|
|
{ |
326 |
|
|
r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask); |
327 |
|
|
_mm_store_si128 (&sfmt[i].si, r); |
328 |
|
|
r1 = r2; |
329 |
|
|
r2 = r; |
330 |
|
|
} |
331 |
|
|
for (; i < N; i++) |
332 |
|
|
{ |
333 |
|
|
r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1 - N].si, r1, r2, mask); |
334 |
|
|
_mm_store_si128 (&sfmt[i].si, r); |
335 |
|
|
r1 = r2; |
336 |
|
|
r2 = r; |
337 |
|
|
} |
338 |
|
|
} |
339 |
|
|
|
340 |
|
|
/** |
341 |
|
|
* This function fills the user-specified array with pseudorandom |
342 |
|
|
* integers. |
343 |
|
|
* |
344 |
|
|
* @param array an 128-bit array to be filled by pseudorandom numbers. |
345 |
|
|
* @param size number of 128-bit pesudorandom numbers to be generated. |
346 |
|
|
*/ |
347 |
|
|
inline static void |
348 |
|
|
gen_rand_array (w128_t *array, int size) |
349 |
|
|
{ |
350 |
|
|
int i, j; |
351 |
|
|
__m128i r, r1, r2, mask; |
352 |
|
|
mask = _mm_set_epi32 (MSK4, MSK3, MSK2, MSK1); |
353 |
|
|
|
354 |
|
|
r1 = _mm_load_si128 (&sfmt[N - 2].si); |
355 |
|
|
r2 = _mm_load_si128 (&sfmt[N - 1].si); |
356 |
|
|
for (i = 0; i < N - POS1; i++) |
357 |
|
|
{ |
358 |
|
|
r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask); |
359 |
|
|
_mm_store_si128 (&array[i].si, r); |
360 |
|
|
r1 = r2; |
361 |
|
|
r2 = r; |
362 |
|
|
} |
363 |
|
|
for (; i < N; i++) |
364 |
|
|
{ |
365 |
|
|
r = mm_recursion (&sfmt[i].si, &array[i + POS1 - N].si, r1, r2, mask); |
366 |
|
|
_mm_store_si128 (&array[i].si, r); |
367 |
|
|
r1 = r2; |
368 |
|
|
r2 = r; |
369 |
|
|
} |
370 |
|
|
/* main loop */ |
371 |
|
|
for (; i < size - N; i++) |
372 |
|
|
{ |
373 |
|
|
r = mm_recursion (&array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask); |
374 |
|
|
_mm_store_si128 (&array[i].si, r); |
375 |
|
|
r1 = r2; |
376 |
|
|
r2 = r; |
377 |
|
|
} |
378 |
|
|
for (j = 0; j < 2 * N - size; j++) |
379 |
|
|
{ |
380 |
|
|
r = _mm_load_si128 (&array[j + size - N].si); |
381 |
|
|
_mm_store_si128 (&sfmt[j].si, r); |
382 |
|
|
} |
383 |
|
|
for (; i < size; i++) |
384 |
|
|
{ |
385 |
|
|
r = mm_recursion (&array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask); |
386 |
|
|
_mm_store_si128 (&array[i].si, r); |
387 |
|
|
_mm_store_si128 (&sfmt[j++].si, r); |
388 |
|
|
r1 = r2; |
389 |
|
|
r2 = r; |
390 |
|
|
} |
391 |
|
|
} |
392 |
|
|
#endif |
393 |
|
|
|
394 |
|
|
/** |
395 |
|
|
* This function simulate a 64-bit index of LITTLE ENDIAN |
396 |
|
|
* in BIG ENDIAN machine. |
397 |
|
|
*/ |
398 |
|
|
#ifdef ONLY64 |
399 |
|
|
inline static int |
400 |
|
|
idxof (int i) |
401 |
|
|
{ |
402 |
|
|
return i ^ 1; |
403 |
|
|
} |
404 |
|
|
#else |
405 |
|
|
inline static int |
406 |
|
|
idxof (int i) |
407 |
|
|
{ |
408 |
|
|
return i; |
409 |
|
|
} |
410 |
|
|
#endif |
411 |
|
|
/** |
412 |
|
|
* This function simulates SIMD 128-bit right shift by the standard C. |
413 |
|
|
* The 128-bit integer given in in is shifted by (shift * 8) bits. |
414 |
|
|
* This function simulates the LITTLE ENDIAN SIMD. |
415 |
|
|
* @param out the output of this function |
416 |
|
|
* @param in the 128-bit data to be shifted |
417 |
|
|
* @param shift the shift value |
418 |
|
|
*/ |
419 |
|
|
#ifdef ONLY64 |
420 |
|
|
inline static void |
421 |
|
|
rshift128 (w128_t *out, w128_t const *in, int shift) |
422 |
|
|
{ |
423 |
|
|
uint64_t th, tl, oh, ol; |
424 |
|
|
|
425 |
|
|
th = ((uint64_t) in->u[2] << 32) | ((uint64_t) in->u[3]); |
426 |
|
|
tl = ((uint64_t) in->u[0] << 32) | ((uint64_t) in->u[1]); |
427 |
|
|
|
428 |
|
|
oh = th >> (shift * 8); |
429 |
|
|
ol = tl >> (shift * 8); |
430 |
|
|
ol |= th << (64 - shift * 8); |
431 |
|
|
out->u[0] = (uint32_t) (ol >> 32); |
432 |
|
|
out->u[1] = (uint32_t) ol; |
433 |
|
|
out->u[2] = (uint32_t) (oh >> 32); |
434 |
|
|
out->u[3] = (uint32_t) oh; |
435 |
|
|
} |
436 |
|
|
#else |
437 |
|
|
inline static void |
438 |
|
|
rshift128 (w128_t *out, w128_t const *in, int shift) |
439 |
|
|
{ |
440 |
|
|
uint64_t th, tl, oh, ol; |
441 |
|
|
|
442 |
|
|
th = ((uint64_t) in->u[3] << 32) | ((uint64_t) in->u[2]); |
443 |
|
|
tl = ((uint64_t) in->u[1] << 32) | ((uint64_t) in->u[0]); |
444 |
|
|
|
445 |
|
|
oh = th >> (shift * 8); |
446 |
|
|
ol = tl >> (shift * 8); |
447 |
|
|
ol |= th << (64 - shift * 8); |
448 |
|
|
out->u[1] = (uint32_t) (ol >> 32); |
449 |
|
|
out->u[0] = (uint32_t) ol; |
450 |
|
|
out->u[3] = (uint32_t) (oh >> 32); |
451 |
|
|
out->u[2] = (uint32_t) oh; |
452 |
|
|
} |
453 |
|
|
#endif |
454 |
|
|
/** |
455 |
|
|
* This function simulates SIMD 128-bit left shift by the standard C. |
456 |
|
|
* The 128-bit integer given in in is shifted by (shift * 8) bits. |
457 |
|
|
* This function simulates the LITTLE ENDIAN SIMD. |
458 |
|
|
* @param out the output of this function |
459 |
|
|
* @param in the 128-bit data to be shifted |
460 |
|
|
* @param shift the shift value |
461 |
|
|
*/ |
462 |
|
|
#ifdef ONLY64 |
463 |
|
|
inline static void |
464 |
|
|
lshift128 (w128_t *out, w128_t const *in, int shift) |
465 |
|
|
{ |
466 |
|
|
uint64_t th, tl, oh, ol; |
467 |
|
|
|
468 |
|
|
th = ((uint64_t) in->u[2] << 32) | ((uint64_t) in->u[3]); |
469 |
|
|
tl = ((uint64_t) in->u[0] << 32) | ((uint64_t) in->u[1]); |
470 |
|
|
|
471 |
|
|
oh = th << (shift * 8); |
472 |
|
|
ol = tl << (shift * 8); |
473 |
|
|
oh |= tl >> (64 - shift * 8); |
474 |
|
|
out->u[0] = (uint32_t) (ol >> 32); |
475 |
|
|
out->u[1] = (uint32_t) ol; |
476 |
|
|
out->u[2] = (uint32_t) (oh >> 32); |
477 |
|
|
out->u[3] = (uint32_t) oh; |
478 |
|
|
} |
479 |
|
|
#else |
480 |
|
|
inline static void |
481 |
|
|
lshift128 (w128_t *out, w128_t const *in, int shift) |
482 |
|
|
{ |
483 |
|
|
uint64_t th, tl, oh, ol; |
484 |
|
|
|
485 |
|
|
th = ((uint64_t) in->u[3] << 32) | ((uint64_t) in->u[2]); |
486 |
|
|
tl = ((uint64_t) in->u[1] << 32) | ((uint64_t) in->u[0]); |
487 |
|
|
|
488 |
|
|
oh = th << (shift * 8); |
489 |
|
|
ol = tl << (shift * 8); |
490 |
|
|
oh |= tl >> (64 - shift * 8); |
491 |
|
|
out->u[1] = (uint32_t) (ol >> 32); |
492 |
|
|
out->u[0] = (uint32_t) ol; |
493 |
|
|
out->u[3] = (uint32_t) (oh >> 32); |
494 |
|
|
out->u[2] = (uint32_t) oh; |
495 |
|
|
} |
496 |
|
|
#endif |
497 |
|
|
|
498 |
|
|
/** |
499 |
|
|
* This function represents the recursion formula. |
500 |
|
|
* @param r output |
501 |
|
|
* @param a a 128-bit part of the internal state array |
502 |
|
|
* @param b a 128-bit part of the internal state array |
503 |
|
|
* @param c a 128-bit part of the internal state array |
504 |
|
|
* @param d a 128-bit part of the internal state array |
505 |
|
|
*/ |
506 |
|
|
#ifdef ONLY64 |
507 |
|
|
inline static void |
508 |
|
|
do_recursion (w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d) |
509 |
|
|
{ |
510 |
|
|
w128_t x; |
511 |
|
|
w128_t y; |
512 |
|
|
|
513 |
|
|
lshift128 (&x, a, SL2); |
514 |
|
|
rshift128 (&y, c, SR2); |
515 |
|
|
r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0] ^ (d->u[0] << SL1); |
516 |
|
|
r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1] ^ (d->u[1] << SL1); |
517 |
|
|
r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2] ^ (d->u[2] << SL1); |
518 |
|
|
r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3] ^ (d->u[3] << SL1); |
519 |
|
|
} |
520 |
|
|
#else |
521 |
|
|
inline static void |
522 |
|
|
do_recursion (w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d) |
523 |
|
|
{ |
524 |
|
|
w128_t x; |
525 |
|
|
w128_t y; |
526 |
|
|
|
527 |
|
|
lshift128 (&x, a, SL2); |
528 |
|
|
rshift128 (&y, c, SR2); |
529 |
|
|
r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0] ^ (d->u[0] << SL1); |
530 |
|
|
r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1] ^ (d->u[1] << SL1); |
531 |
|
|
r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2] ^ (d->u[2] << SL1); |
532 |
|
|
r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3] ^ (d->u[3] << SL1); |
533 |
|
|
} |
534 |
|
|
#endif |
535 |
|
|
|
536 |
|
|
#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) |
537 |
|
|
/** |
538 |
|
|
* This function fills the internal state array with pseudorandom |
539 |
|
|
* integers. |
540 |
|
|
*/ |
541 |
|
|
inline static void |
542 |
|
|
gen_rand_all (void) |
543 |
|
|
{ |
544 |
|
|
int i; |
545 |
|
|
w128_t *r1, *r2; |
546 |
|
|
|
547 |
|
|
r1 = &sfmt[N - 2]; |
548 |
|
|
r2 = &sfmt[N - 1]; |
549 |
|
|
for (i = 0; i < N - POS1; i++) |
550 |
|
|
{ |
551 |
|
|
do_recursion (&sfmt[i], &sfmt[i], &sfmt[i + POS1], r1, r2); |
552 |
|
|
r1 = r2; |
553 |
|
|
r2 = &sfmt[i]; |
554 |
|
|
} |
555 |
|
|
for (; i < N; i++) |
556 |
|
|
{ |
557 |
|
|
do_recursion (&sfmt[i], &sfmt[i], &sfmt[i + POS1 - N], r1, r2); |
558 |
|
|
r1 = r2; |
559 |
|
|
r2 = &sfmt[i]; |
560 |
|
|
} |
561 |
|
|
} |
562 |
|
|
|
563 |
|
|
/** |
564 |
|
|
* This function fills the user-specified array with pseudorandom |
565 |
|
|
* integers. |
566 |
|
|
* |
567 |
|
|
* @param array an 128-bit array to be filled by pseudorandom numbers. |
568 |
|
|
* @param size number of 128-bit pseudorandom numbers to be generated. |
569 |
|
|
*/ |
570 |
|
|
inline static void |
571 |
|
|
gen_rand_array (w128_t *array, int size) |
572 |
|
|
{ |
573 |
|
|
int i, j; |
574 |
|
|
w128_t *r1, *r2; |
575 |
|
|
|
576 |
|
|
r1 = &sfmt[N - 2]; |
577 |
|
|
r2 = &sfmt[N - 1]; |
578 |
|
|
for (i = 0; i < N - POS1; i++) |
579 |
|
|
{ |
580 |
|
|
do_recursion (&array[i], &sfmt[i], &sfmt[i + POS1], r1, r2); |
581 |
|
|
r1 = r2; |
582 |
|
|
r2 = &array[i]; |
583 |
|
|
} |
584 |
|
|
for (; i < N; i++) |
585 |
|
|
{ |
586 |
|
|
do_recursion (&array[i], &sfmt[i], &array[i + POS1 - N], r1, r2); |
587 |
|
|
r1 = r2; |
588 |
|
|
r2 = &array[i]; |
589 |
|
|
} |
590 |
|
|
for (; i < size - N; i++) |
591 |
|
|
{ |
592 |
|
|
do_recursion (&array[i], &array[i - N], &array[i + POS1 - N], r1, r2); |
593 |
|
|
r1 = r2; |
594 |
|
|
r2 = &array[i]; |
595 |
|
|
} |
596 |
|
|
for (j = 0; j < 2 * N - size; j++) |
597 |
|
|
{ |
598 |
|
|
sfmt[j] = array[j + size - N]; |
599 |
|
|
} |
600 |
|
|
for (; i < size; i++, j++) |
601 |
|
|
{ |
602 |
|
|
do_recursion (&array[i], &array[i - N], &array[i + POS1 - N], r1, r2); |
603 |
|
|
r1 = r2; |
604 |
|
|
r2 = &array[i]; |
605 |
|
|
sfmt[j] = array[i]; |
606 |
|
|
} |
607 |
|
|
} |
608 |
|
|
#endif |
609 |
|
|
|
610 |
|
|
#if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC) |
611 |
|
|
inline static void |
612 |
|
|
swap (w128_t *array, int size) |
613 |
|
|
{ |
614 |
|
|
int i; |
615 |
|
|
uint32_t x, y; |
616 |
|
|
|
617 |
|
|
for (i = 0; i < size; i++) |
618 |
|
|
{ |
619 |
|
|
x = array[i].u[0]; |
620 |
|
|
y = array[i].u[2]; |
621 |
|
|
array[i].u[0] = array[i].u[1]; |
622 |
|
|
array[i].u[2] = array[i].u[3]; |
623 |
|
|
array[i].u[1] = x; |
624 |
|
|
array[i].u[3] = y; |
625 |
|
|
} |
626 |
|
|
} |
627 |
|
|
#endif |
628 |
|
|
/** |
629 |
|
|
* This function represents a function used in the initialization |
630 |
|
|
* by init_by_array |
631 |
|
|
* @param x 32-bit integer |
632 |
|
|
* @return 32-bit integer |
633 |
|
|
*/ |
634 |
|
|
inline static uint32_t |
635 |
|
|
func1 (uint32_t x) |
636 |
|
|
{ |
637 |
|
|
return (x ^ (x >> 27)) * (uint32_t) 1664525UL; |
638 |
|
|
} |
639 |
|
|
|
640 |
|
|
/** |
641 |
|
|
* This function represents a function used in the initialization |
642 |
|
|
* by init_by_array |
643 |
|
|
* @param x 32-bit integer |
644 |
|
|
* @return 32-bit integer |
645 |
|
|
*/ |
646 |
|
|
inline static uint32_t |
647 |
|
|
func2 (uint32_t x) |
648 |
|
|
{ |
649 |
|
|
return (x ^ (x >> 27)) * (uint32_t) 1566083941UL; |
650 |
|
|
} |
651 |
|
|
|
652 |
|
|
/** |
653 |
|
|
* This function certificate the period of 2^{MEXP} |
654 |
|
|
*/ |
655 |
|
|
static void |
656 |
|
|
period_certification (void) |
657 |
|
|
{ |
658 |
|
|
int inner = 0; |
659 |
|
|
int i, j; |
660 |
|
|
uint32_t work; |
661 |
|
|
|
662 |
|
|
for (i = 0; i < 4; i++) |
663 |
|
|
inner ^= psfmt32[idxof (i)] & parity[i]; |
664 |
|
|
for (i = 16; i > 0; i >>= 1) |
665 |
|
|
inner ^= inner >> i; |
666 |
|
|
inner &= 1; |
667 |
|
|
/* check OK */ |
668 |
|
|
if (inner == 1) |
669 |
|
|
{ |
670 |
|
|
return; |
671 |
|
|
} |
672 |
|
|
/* check NG, and modification */ |
673 |
|
|
for (i = 0; i < 4; i++) |
674 |
|
|
{ |
675 |
|
|
work = 1; |
676 |
|
|
for (j = 0; j < 32; j++) |
677 |
|
|
{ |
678 |
|
|
if ((work & parity[i]) != 0) |
679 |
|
|
{ |
680 |
|
|
psfmt32[idxof (i)] ^= work; |
681 |
|
|
return; |
682 |
|
|
} |
683 |
|
|
work = work << 1; |
684 |
|
|
} |
685 |
|
|
} |
686 |
|
|
} |
687 |
|
|
|
688 |
|
|
/*---------------- |
689 |
|
|
PUBLIC FUNCTIONS |
690 |
|
|
----------------*/ |
691 |
|
|
/** |
692 |
|
|
* This function returns the identification string. |
693 |
|
|
* The string shows the word size, the Mersenne exponent, |
694 |
|
|
* and all parameters of this generator. |
695 |
|
|
*/ |
696 |
|
|
char const * const |
697 |
|
|
get_idstring (void) |
698 |
|
|
{ |
699 |
|
|
return IDSTR; |
700 |
|
|
} |
701 |
|
|
|
702 |
|
|
/** |
703 |
|
|
* This function returns the minimum size of array used for \b |
704 |
|
|
* fill_array32() function. |
705 |
|
|
* @return minimum size of array used for fill_array32() function. |
706 |
|
|
*/ |
707 |
|
|
int |
708 |
|
|
get_min_array_size32 (void) |
709 |
|
|
{ |
710 |
|
|
return N32; |
711 |
|
|
} |
712 |
|
|
|
713 |
|
|
/** |
714 |
|
|
* This function returns the minimum size of array used for \b |
715 |
|
|
* fill_array64() function. |
716 |
|
|
* @return minimum size of array used for fill_array64() function. |
717 |
|
|
*/ |
718 |
|
|
int |
719 |
|
|
get_min_array_size64 (void) |
720 |
|
|
{ |
721 |
|
|
return N64; |
722 |
|
|
} |
723 |
|
|
|
724 |
|
|
#ifndef ONLY64 |
725 |
|
|
/** |
726 |
|
|
* This function generates and returns 32-bit pseudorandom number. |
727 |
|
|
* init_gen_rand or init_by_array must be called before this function. |
728 |
|
|
* @return 32-bit pseudorandom number |
729 |
|
|
*/ |
730 |
|
|
uint32_t |
731 |
|
|
gen_rand32 (void) |
732 |
|
|
{ |
733 |
|
|
uint32_t r; |
734 |
|
|
|
735 |
|
|
assert (initialized); |
736 |
|
|
if (idx >= N32) |
737 |
|
|
{ |
738 |
|
|
gen_rand_all (); |
739 |
|
|
idx = 0; |
740 |
|
|
} |
741 |
|
|
r = psfmt32[idx++]; |
742 |
|
|
return r; |
743 |
|
|
} |
744 |
|
|
#endif |
745 |
|
|
/** |
746 |
|
|
* This function generates and returns 64-bit pseudorandom number. |
747 |
|
|
* init_gen_rand or init_by_array must be called before this function. |
748 |
|
|
* The function gen_rand64 should not be called after gen_rand32, |
749 |
|
|
* unless an initialization is again executed. |
750 |
|
|
* @return 64-bit pseudorandom number |
751 |
|
|
*/ |
752 |
|
|
uint64_t |
753 |
|
|
gen_rand64 (void) |
754 |
|
|
{ |
755 |
|
|
#if defined(BIG_ENDIAN64) && !defined(ONLY64) |
756 |
|
|
uint32_t r1, r2; |
757 |
|
|
#else |
758 |
|
|
uint64_t r; |
759 |
|
|
#endif |
760 |
|
|
|
761 |
|
|
assert (initialized); |
762 |
|
|
assert (idx % 2 == 0); |
763 |
|
|
|
764 |
|
|
if (idx >= N32) |
765 |
|
|
{ |
766 |
|
|
gen_rand_all (); |
767 |
|
|
idx = 0; |
768 |
|
|
} |
769 |
|
|
#if defined(BIG_ENDIAN64) && !defined(ONLY64) |
770 |
|
|
r1 = psfmt32[idx]; |
771 |
|
|
r2 = psfmt32[idx + 1]; |
772 |
|
|
idx += 2; |
773 |
|
|
return ((uint64_t) r2 << 32) | r1; |
774 |
|
|
#else |
775 |
|
|
r = psfmt64[idx / 2]; |
776 |
|
|
idx += 2; |
777 |
|
|
return r; |
778 |
|
|
#endif |
779 |
|
|
} |
780 |
|
|
|
781 |
|
|
#ifndef ONLY64 |
782 |
|
|
/** |
783 |
|
|
* This function generates pseudorandom 32-bit integers in the |
784 |
|
|
* specified array[] by one call. The number of pseudorandom integers |
785 |
|
|
* is specified by the argument size, which must be at least 624 and a |
786 |
|
|
* multiple of four. The generation by this function is much faster |
787 |
|
|
* than the following gen_rand function. |
788 |
|
|
* |
789 |
|
|
* For initialization, init_gen_rand or init_by_array must be called |
790 |
|
|
* before the first call of this function. This function can not be |
791 |
|
|
* used after calling gen_rand function, without initialization. |
792 |
|
|
* |
793 |
|
|
* @param array an array where pseudorandom 32-bit integers are filled |
794 |
|
|
* by this function. The pointer to the array must be \b "aligned" |
795 |
|
|
* (namely, must be a multiple of 16) in the SIMD version, since it |
796 |
|
|
* refers to the address of a 128-bit integer. In the standard C |
797 |
|
|
* version, the pointer is arbitrary. |
798 |
|
|
* |
799 |
|
|
* @param size the number of 32-bit pseudorandom integers to be |
800 |
|
|
* generated. size must be a multiple of 4, and greater than or equal |
801 |
|
|
* to (MEXP / 128 + 1) * 4. |
802 |
|
|
* |
803 |
|
|
* @note \b memalign or \b posix_memalign is available to get aligned |
804 |
|
|
* memory. Mac OSX doesn't have these functions, but \b malloc of OSX |
805 |
|
|
* returns the pointer to the aligned memory block. |
806 |
|
|
*/ |
807 |
|
|
void |
808 |
|
|
fill_array32 (uint32_t * array, int size) |
809 |
|
|
{ |
810 |
|
|
assert (initialized); |
811 |
|
|
assert (idx == N32); |
812 |
|
|
assert (size % 4 == 0); |
813 |
|
|
assert (size >= N32); |
814 |
|
|
|
815 |
|
|
gen_rand_array (reinterpret_cast<w128_t *> (array), size / 4); |
816 |
|
|
idx = N32; |
817 |
|
|
} |
818 |
|
|
#endif |
819 |
|
|
|
820 |
|
|
/** |
821 |
|
|
* This function generates pseudorandom 64-bit integers in the |
822 |
|
|
* specified array[] by one call. The number of pseudorandom integers |
823 |
|
|
* is specified by the argument size, which must be at least 312 and a |
824 |
|
|
* multiple of two. The generation by this function is much faster |
825 |
|
|
* than the following gen_rand function. |
826 |
|
|
* |
827 |
|
|
* For initialization, init_gen_rand or init_by_array must be called |
828 |
|
|
* before the first call of this function. This function can not be |
829 |
|
|
* used after calling gen_rand function, without initialization. |
830 |
|
|
* |
831 |
|
|
* @param array an array where pseudorandom 64-bit integers are filled |
832 |
|
|
* by this function. The pointer to the array must be "aligned" |
833 |
|
|
* (namely, must be a multiple of 16) in the SIMD version, since it |
834 |
|
|
* refers to the address of a 128-bit integer. In the standard C |
835 |
|
|
* version, the pointer is arbitrary. |
836 |
|
|
* |
837 |
|
|
* @param size the number of 64-bit pseudorandom integers to be |
838 |
|
|
* generated. size must be a multiple of 2, and greater than or equal |
839 |
|
|
* to (MEXP / 128 + 1) * 2 |
840 |
|
|
* |
841 |
|
|
* @note \b memalign or \b posix_memalign is available to get aligned |
842 |
|
|
* memory. Mac OSX doesn't have these functions, but \b malloc of OSX |
843 |
|
|
* returns the pointer to the aligned memory block. |
844 |
|
|
*/ |
845 |
|
|
void |
846 |
|
|
fill_array64 (uint64_t * array, int size) |
847 |
|
|
{ |
848 |
|
|
assert (initialized); |
849 |
|
|
assert (idx == N32); |
850 |
|
|
assert (size % 2 == 0); |
851 |
|
|
assert (size >= N64); |
852 |
|
|
|
853 |
|
|
gen_rand_array (reinterpret_cast<w128_t *> (array), size / 2); |
854 |
|
|
idx = N32; |
855 |
|
|
|
856 |
|
|
#if defined(BIG_ENDIAN64) && !defined(ONLY64) |
857 |
|
|
swap (reinterpret_cast<w128_t *> (array), size / 2); |
858 |
|
|
#endif |
859 |
|
|
} |
860 |
|
|
|
861 |
|
|
/** |
862 |
|
|
* This function initializes the internal state array with a 32-bit |
863 |
|
|
* integer seed. |
864 |
|
|
* |
865 |
|
|
* @param seed a 32-bit integer used as the seed. |
866 |
|
|
*/ |
867 |
|
|
void |
868 |
|
|
init_gen_rand (uint32_t seed) |
869 |
|
|
{ |
870 |
|
|
int i; |
871 |
|
|
|
872 |
|
|
psfmt32[idxof (0)] = seed; |
873 |
|
|
for (i = 1; i < N32; i++) |
874 |
|
|
{ |
875 |
|
|
psfmt32[idxof (i)] = 1812433253UL * (psfmt32[idxof (i - 1)] ^ (psfmt32[idxof (i - 1)] >> 30)) + i; |
876 |
|
|
} |
877 |
|
|
idx = N32; |
878 |
|
|
period_certification (); |
879 |
|
|
initialized = 1; |
880 |
|
|
} |
881 |
|
|
|
882 |
|
|
/** |
883 |
|
|
* This function initializes the internal state array, |
884 |
|
|
* with an array of 32-bit integers used as the seeds |
885 |
|
|
* @param init_key the array of 32-bit integers, used as a seed. |
886 |
|
|
* @param key_length the length of init_key. |
887 |
|
|
*/ |
888 |
|
|
void |
889 |
|
|
init_by_array (uint32_t * init_key, int key_length) |
890 |
|
|
{ |
891 |
|
|
int i, j, count; |
892 |
|
|
uint32_t r; |
893 |
|
|
int lag; |
894 |
|
|
int mid; |
895 |
|
|
int size = N * 4; |
896 |
|
|
|
897 |
|
|
if (size >= 623) |
898 |
|
|
{ |
899 |
|
|
lag = 11; |
900 |
|
|
} |
901 |
|
|
else if (size >= 68) |
902 |
|
|
{ |
903 |
|
|
lag = 7; |
904 |
|
|
} |
905 |
|
|
else if (size >= 39) |
906 |
|
|
{ |
907 |
|
|
lag = 5; |
908 |
|
|
} |
909 |
|
|
else |
910 |
|
|
{ |
911 |
|
|
lag = 3; |
912 |
|
|
} |
913 |
|
|
mid = (size - lag) / 2; |
914 |
|
|
|
915 |
|
|
memset (sfmt, 0x8b, sizeof (sfmt)); |
916 |
|
|
if (key_length + 1 > N32) |
917 |
|
|
{ |
918 |
|
|
count = key_length + 1; |
919 |
|
|
} |
920 |
|
|
else |
921 |
|
|
{ |
922 |
|
|
count = N32; |
923 |
|
|
} |
924 |
|
|
r = func1 (psfmt32[idxof (0)] ^ psfmt32[idxof (mid)] ^ psfmt32[idxof (N32 - 1)]); |
925 |
|
|
psfmt32[idxof (mid)] += r; |
926 |
|
|
r += key_length; |
927 |
|
|
psfmt32[idxof (mid + lag)] += r; |
928 |
|
|
psfmt32[idxof (0)] = r; |
929 |
|
|
|
930 |
|
|
count--; |
931 |
|
|
for (i = 1, j = 0; (j < count) && (j < key_length); j++) |
932 |
|
|
{ |
933 |
|
|
r = func1 (psfmt32[idxof (i)] ^ psfmt32[idxof ((i + mid) % N32)] ^ psfmt32[idxof ((i + N32 - 1) % N32)]); |
934 |
|
|
psfmt32[idxof ((i + mid) % N32)] += r; |
935 |
|
|
r += init_key[j] + i; |
936 |
|
|
psfmt32[idxof ((i + mid + lag) % N32)] += r; |
937 |
|
|
psfmt32[idxof (i)] = r; |
938 |
|
|
i = (i + 1) % N32; |
939 |
|
|
} |
940 |
|
|
for (; j < count; j++) |
941 |
|
|
{ |
942 |
|
|
r = func1 (psfmt32[idxof (i)] ^ psfmt32[idxof ((i + mid) % N32)] ^ psfmt32[idxof ((i + N32 - 1) % N32)]); |
943 |
|
|
psfmt32[idxof ((i + mid) % N32)] += r; |
944 |
|
|
r += i; |
945 |
|
|
psfmt32[idxof ((i + mid + lag) % N32)] += r; |
946 |
|
|
psfmt32[idxof (i)] = r; |
947 |
|
|
i = (i + 1) % N32; |
948 |
|
|
} |
949 |
|
|
for (j = 0; j < N32; j++) |
950 |
|
|
{ |
951 |
|
|
r = func2 (psfmt32[idxof (i)] + psfmt32[idxof ((i + mid) % N32)] + psfmt32[idxof ((i + N32 - 1) % N32)]); |
952 |
|
|
psfmt32[idxof ((i + mid) % N32)] ^= r; |
953 |
|
|
r -= i; |
954 |
|
|
psfmt32[idxof ((i + mid + lag) % N32)] ^= r; |
955 |
|
|
psfmt32[idxof (i)] = r; |
956 |
|
|
i = (i + 1) % N32; |
957 |
|
|
} |
958 |
|
|
|
959 |
|
|
idx = N32; |
960 |
|
|
period_certification (); |
961 |
|
|
initialized = 1; |
962 |
|
|
} |