ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/ermyth/src/random.C
Revision: 1.4
Committed: Sat Sep 22 14:27:30 2007 UTC (16 years, 7 months ago) by pippijn
Content type: text/plain
Branch: MAIN
CVS Tags: HEAD
Changes since 1.3: +1 -1 lines
State: FILE REMOVED
Log Message:
split up ermyth into ermyth-modules, libermyth (currently just ermyth-util) and ermyth-core

File Contents

# Content
1 /**
2 * random.C: SIMD oriented Fast Mersenne Twister(SFMT)
3 *
4 * Copyright © 2007 Pippijn van Steenhoven / The Ermyth Team
5 * Rights to this code are as documented in COPYING.
6 *
7 *
8 * Portions of this file were derived from sources bearing the following license:
9 *
10 * Copyright © 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
11 * University. All rights reserved.
12 *
13 * @author Mutsuo Saito (Hiroshima University)
14 * @author Makoto Matsumoto (Hiroshima University)
15 *
16 * The new BSD License is applied to this software.
17 */
18
19 static char const rcsid[] = "$Id: random.C,v 1.3 2007-09-16 18:54:45 pippijn Exp $";
20
21 #include <string.h>
22 #include <assert.h>
23
24 #include <common/random.h>
25
26 /*-----------------
27 BASIC DEFINITIONS
28 -----------------*/
29 /** SFMT generator has an internal state array of 128-bit integers,
30 * and N is its size. */
31 #define N (216091 / 128 + 1)
32 /** N32 is the size of internal state array when regarded as an array
33 * of 32-bit integers.*/
34 #define N32 (N * 4)
35 /** N64 is the size of internal state array when regarded as an array
36 * of 64-bit integers.*/
37 #define N64 (N * 2)
38
39 /*----------------------
40 the parameters of SFMT
41 following definitions are in paramsXXXX.h file.
42 ----------------------*/
43 // the pick up position of the array.
44 #define POS1 627
45 // the parameter of shift left as four 32-bit registers.
46 #define SL1 11
47 // the parameter of shift left as one 128-bit register.
48 // The 128-bit integer is shifted by (SL2 * 8) bits.
49 #define SL2 3
50 // the parameter of shift right as four 32-bit registers.
51 #define SR1 10
52 // the parameter of shift right as one 128-bit register.
53 // The 128-bit integer is shifted by (SL2 * 8) bits.
54 #define SR2 1
55 // A bitmask, used in the recursion. These parameters are introduced
56 // to break symmetry of SIMD.
57 #define MSK1 0xbff7bff7U
58 #define MSK2 0xbfffffffU
59 #define MSK3 0xbffffa7fU
60 #define MSK4 0xffddfbfbU
61 // These definitions are part of a 128-bit period certification vector.
62 #define PARITY1 0xf8000001U
63 #define PARITY2 0x89e80709U
64 #define PARITY3 0x3bd2b64bU
65 #define PARITY4 0x0c64b1e4U
66
67 /* PARAMETERS FOR ALTIVEC */
68 #if defined(__APPLE__) /* For OSX */
69 #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
70 #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
71 #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
72 #define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
73 #define ALTI_SL2_PERM (vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
74 #define ALTI_SL2_PERM64 (vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
75 #define ALTI_SR2_PERM (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
76 #define ALTI_SR2_PERM64 (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
77 #else // !__APPLE__
78 #define ALTI_SL1 {SL1, SL1, SL1, SL1}
79 #define ALTI_SR1 {SR1, SR1, SR1, SR1}
80 #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4}
81 #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3}
82 #define ALTI_SL2_PERM { 3,21,21,21, 7, 0, 1, 2,11, 4, 5, 6,15, 8, 9,10}
83 #define ALTI_SL2_PERM64 { 3, 4, 5, 6, 7,29,29,29,11,12,13,14,15, 0, 1, 2}
84 #define ALTI_SR2_PERM { 7, 0, 1, 2,11, 4, 5, 6,15, 8, 9,10,17,12,13,14}
85 #define ALTI_SR2_PERM64 {15, 0, 1, 2, 3, 4, 5, 6,17, 8, 9,10,11,12,13,14}
86 #endif // __APPLE__
87 #define IDSTR "SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb"
88
89 #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)
90 #define BIG_ENDIAN64 1
91 #endif
92 #if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64)
93 #define BIG_ENDIAN64 1
94 #endif
95 #if defined(ONLY64) && !defined(BIG_ENDIAN64)
96 #if defined(__GNUC__)
97 #error "-DONLY64 must be specified with -DBIG_ENDIAN64"
98 #endif
99 #undef ONLY64
100 #endif
101
102 /*--------------------------------------
103 FILE GLOBAL VARIABLES
104 internal state, index counter and flag
105 --------------------------------------*/
106 /** the 128-bit internal state array */
107 static w128_t sfmt[N];
108 /** the 32bit integer pointer to the 128-bit internal state array */
109 static uint32_t *psfmt32 = &sfmt[0].u[0];
110 #if !defined(BIG_ENDIAN64) || defined(ONLY64)
111 /** the 64bit integer pointer to the 128-bit internal state array */
112 static uint64_t *psfmt64 = reinterpret_cast<uint64_t *> (&sfmt[0].u[0]);
113 #endif
114 /** index counter to the 32-bit internal state array */
115 static int idx;
116 /** a flag: it is 0 if and only if the internal state is not yet
117 * initialized. */
118 static int initialized = 0;
119 /** a parity check vector which certificate the period of 2^{MEXP} */
120 static uint32_t parity[4] = { PARITY1, PARITY2, PARITY3, PARITY4 };
121
122 /*----------------
123 STATIC FUNCTIONS
124 ----------------*/
125 inline static int idxof (int i);
126 inline static void rshift128 (w128_t *out, w128_t const *in, int shift);
127 inline static void lshift128 (w128_t *out, w128_t const *in, int shift);
128 inline static void gen_rand_all (void);
129 inline static void gen_rand_array (w128_t *array, int size);
130 inline static uint32_t func1 (uint32_t x);
131 inline static uint32_t func2 (uint32_t x);
132 inline static void period_certification (void);
133 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
134 inline static void swap (w128_t *array, int size);
135 #endif
136
137 #if defined(HAVE_ALTIVEC)
138 inline static vector unsigned int
139 vec_recursion (vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d) ALWAYSINLINE;
140
141 /**
142 * This function represents the recursion formula in AltiVec and BIG ENDIAN.
143 * @param a a 128-bit part of the interal state array
144 * @param b a 128-bit part of the interal state array
145 * @param c a 128-bit part of the interal state array
146 * @param d a 128-bit part of the interal state array
147 * @return output
148 */
149 inline static vector unsigned int vec_recursion (vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d)
150 {
151 const vector unsigned int sl1 = ALTI_SL1;
152 const vector unsigned int sr1 = ALTI_SR1;
153 #ifdef ONLY64
154 const vector unsigned int mask = ALTI_MSK64;
155 const vector unsigned char perm_sl = ALTI_SL2_PERM64;
156 const vector unsigned char perm_sr = ALTI_SR2_PERM64;
157 #else
158 const vector unsigned int mask = ALTI_MSK;
159 const vector unsigned char perm_sl = ALTI_SL2_PERM;
160 const vector unsigned char perm_sr = ALTI_SR2_PERM;
161 #endif
162 vector unsigned int v, w, x, y, z;
163 x = vec_perm (a, (vector unsigned int) perm_sl, perm_sl);
164 v = a;
165 y = vec_sr (b, sr1);
166 z = vec_perm (c, (vector unsigned int) perm_sr, perm_sr);
167 w = vec_sl (d, sl1);
168 z = vec_xor (z, w);
169 y = vec_and (y, mask);
170 v = vec_xor (v, x);
171 z = vec_xor (z, y);
172 z = vec_xor (z, v);
173 return z;
174 }
175
176 /**
177 * This function fills the internal state array with pseudorandom
178 * integers.
179 */
180 inline static void
181 gen_rand_all (void)
182 {
183 int i;
184 vector unsigned int r, r1, r2;
185
186 r1 = sfmt[N - 2].s;
187 r2 = sfmt[N - 1].s;
188 for (i = 0; i < N - POS1; i++)
189 {
190 r = vec_recursion (sfmt[i].s, sfmt[i + POS1].s, r1, r2);
191 sfmt[i].s = r;
192 r1 = r2;
193 r2 = r;
194 }
195 for (; i < N; i++)
196 {
197 r = vec_recursion (sfmt[i].s, sfmt[i + POS1 - N].s, r1, r2);
198 sfmt[i].s = r;
199 r1 = r2;
200 r2 = r;
201 }
202 }
203
204 /**
205 * This function fills the user-specified array with pseudorandom
206 * integers.
207 *
208 * @param array an 128-bit array to be filled by pseudorandom numbers.
209 * @param size number of 128-bit pesudorandom numbers to be generated.
210 */
211 inline static void
212 gen_rand_array (w128_t *array, int size)
213 {
214 int i, j;
215 vector unsigned int r, r1, r2;
216
217 r1 = sfmt[N - 2].s;
218 r2 = sfmt[N - 1].s;
219 for (i = 0; i < N - POS1; i++)
220 {
221 r = vec_recursion (sfmt[i].s, sfmt[i + POS1].s, r1, r2);
222 array[i].s = r;
223 r1 = r2;
224 r2 = r;
225 }
226 for (; i < N; i++)
227 {
228 r = vec_recursion (sfmt[i].s, array[i + POS1 - N].s, r1, r2);
229 array[i].s = r;
230 r1 = r2;
231 r2 = r;
232 }
233 /* main loop */
234 for (; i < size - N; i++)
235 {
236 r = vec_recursion (array[i - N].s, array[i + POS1 - N].s, r1, r2);
237 array[i].s = r;
238 r1 = r2;
239 r2 = r;
240 }
241 for (j = 0; j < 2 * N - size; j++)
242 {
243 sfmt[j].s = array[j + size - N].s;
244 }
245 for (; i < size; i++)
246 {
247 r = vec_recursion (array[i - N].s, array[i + POS1 - N].s, r1, r2);
248 array[i].s = r;
249 sfmt[j++].s = r;
250 r1 = r2;
251 r2 = r;
252 }
253 }
254
255 #ifndef ONLY64
256 #if defined(__APPLE__)
257 #define ALTI_SWAP (vector unsigned char) \
258 (4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)
259 #else
260 #define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}
261 #endif
262 /**
263 * This function swaps high and low 32-bit of 64-bit integers in user
264 * specified array.
265 *
266 * @param array an 128-bit array to be swaped.
267 * @param size size of 128-bit array.
268 */
269 inline static void
270 swap (w128_t *array, int size)
271 {
272 int i;
273 const vector unsigned char perm = ALTI_SWAP;
274
275 for (i = 0; i < size; i++)
276 {
277 array[i].s = vec_perm (array[i].s, (vector unsigned int) perm, perm);
278 }
279 }
280 #endif
281 #elif defined(HAVE_SSE2)
282 inline static __m128i
283 mm_recursion (__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask);
284
285 /**
286 * This function represents the recursion formula.
287 * @param a a 128-bit part of the interal state array
288 * @param b a 128-bit part of the interal state array
289 * @param c a 128-bit part of the interal state array
290 * @param d a 128-bit part of the interal state array
291 * @param mask 128-bit mask
292 * @return output
293 */
294 inline static __m128i mm_recursion (__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask)
295 {
296 __m128i v, x, y, z;
297
298 x = _mm_load_si128 (a);
299 y = _mm_srli_epi32 (*b, SR1);
300 z = _mm_srli_si128 (c, SR2);
301 v = _mm_slli_epi32 (d, SL1);
302 z = _mm_xor_si128 (z, x);
303 z = _mm_xor_si128 (z, v);
304 x = _mm_slli_si128 (x, SL2);
305 y = _mm_and_si128 (y, mask);
306 z = _mm_xor_si128 (z, x);
307 z = _mm_xor_si128 (z, y);
308 return z;
309 }
310
311 /**
312 * This function fills the internal state array with pseudorandom
313 * integers.
314 */
315 inline static void
316 gen_rand_all (void)
317 {
318 int i;
319 __m128i r, r1, r2, mask;
320 mask = _mm_set_epi32 (MSK4, MSK3, MSK2, MSK1);
321
322 r1 = _mm_load_si128 (&sfmt[N - 2].si);
323 r2 = _mm_load_si128 (&sfmt[N - 1].si);
324 for (i = 0; i < N - POS1; i++)
325 {
326 r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask);
327 _mm_store_si128 (&sfmt[i].si, r);
328 r1 = r2;
329 r2 = r;
330 }
331 for (; i < N; i++)
332 {
333 r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1 - N].si, r1, r2, mask);
334 _mm_store_si128 (&sfmt[i].si, r);
335 r1 = r2;
336 r2 = r;
337 }
338 }
339
340 /**
341 * This function fills the user-specified array with pseudorandom
342 * integers.
343 *
344 * @param array an 128-bit array to be filled by pseudorandom numbers.
345 * @param size number of 128-bit pesudorandom numbers to be generated.
346 */
347 inline static void
348 gen_rand_array (w128_t *array, int size)
349 {
350 int i, j;
351 __m128i r, r1, r2, mask;
352 mask = _mm_set_epi32 (MSK4, MSK3, MSK2, MSK1);
353
354 r1 = _mm_load_si128 (&sfmt[N - 2].si);
355 r2 = _mm_load_si128 (&sfmt[N - 1].si);
356 for (i = 0; i < N - POS1; i++)
357 {
358 r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask);
359 _mm_store_si128 (&array[i].si, r);
360 r1 = r2;
361 r2 = r;
362 }
363 for (; i < N; i++)
364 {
365 r = mm_recursion (&sfmt[i].si, &array[i + POS1 - N].si, r1, r2, mask);
366 _mm_store_si128 (&array[i].si, r);
367 r1 = r2;
368 r2 = r;
369 }
370 /* main loop */
371 for (; i < size - N; i++)
372 {
373 r = mm_recursion (&array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
374 _mm_store_si128 (&array[i].si, r);
375 r1 = r2;
376 r2 = r;
377 }
378 for (j = 0; j < 2 * N - size; j++)
379 {
380 r = _mm_load_si128 (&array[j + size - N].si);
381 _mm_store_si128 (&sfmt[j].si, r);
382 }
383 for (; i < size; i++)
384 {
385 r = mm_recursion (&array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
386 _mm_store_si128 (&array[i].si, r);
387 _mm_store_si128 (&sfmt[j++].si, r);
388 r1 = r2;
389 r2 = r;
390 }
391 }
392 #endif
393
394 /**
395 * This function simulate a 64-bit index of LITTLE ENDIAN
396 * in BIG ENDIAN machine.
397 */
398 #ifdef ONLY64
399 inline static int
400 idxof (int i)
401 {
402 return i ^ 1;
403 }
404 #else
405 inline static int
406 idxof (int i)
407 {
408 return i;
409 }
410 #endif
411 /**
412 * This function simulates SIMD 128-bit right shift by the standard C.
413 * The 128-bit integer given in in is shifted by (shift * 8) bits.
414 * This function simulates the LITTLE ENDIAN SIMD.
415 * @param out the output of this function
416 * @param in the 128-bit data to be shifted
417 * @param shift the shift value
418 */
419 #ifdef ONLY64
420 inline static void
421 rshift128 (w128_t *out, w128_t const *in, int shift)
422 {
423 uint64_t th, tl, oh, ol;
424
425 th = ((uint64_t) in->u[2] << 32) | ((uint64_t) in->u[3]);
426 tl = ((uint64_t) in->u[0] << 32) | ((uint64_t) in->u[1]);
427
428 oh = th >> (shift * 8);
429 ol = tl >> (shift * 8);
430 ol |= th << (64 - shift * 8);
431 out->u[0] = (uint32_t) (ol >> 32);
432 out->u[1] = (uint32_t) ol;
433 out->u[2] = (uint32_t) (oh >> 32);
434 out->u[3] = (uint32_t) oh;
435 }
436 #else
437 inline static void
438 rshift128 (w128_t *out, w128_t const *in, int shift)
439 {
440 uint64_t th, tl, oh, ol;
441
442 th = ((uint64_t) in->u[3] << 32) | ((uint64_t) in->u[2]);
443 tl = ((uint64_t) in->u[1] << 32) | ((uint64_t) in->u[0]);
444
445 oh = th >> (shift * 8);
446 ol = tl >> (shift * 8);
447 ol |= th << (64 - shift * 8);
448 out->u[1] = (uint32_t) (ol >> 32);
449 out->u[0] = (uint32_t) ol;
450 out->u[3] = (uint32_t) (oh >> 32);
451 out->u[2] = (uint32_t) oh;
452 }
453 #endif
454 /**
455 * This function simulates SIMD 128-bit left shift by the standard C.
456 * The 128-bit integer given in in is shifted by (shift * 8) bits.
457 * This function simulates the LITTLE ENDIAN SIMD.
458 * @param out the output of this function
459 * @param in the 128-bit data to be shifted
460 * @param shift the shift value
461 */
462 #ifdef ONLY64
463 inline static void
464 lshift128 (w128_t *out, w128_t const *in, int shift)
465 {
466 uint64_t th, tl, oh, ol;
467
468 th = ((uint64_t) in->u[2] << 32) | ((uint64_t) in->u[3]);
469 tl = ((uint64_t) in->u[0] << 32) | ((uint64_t) in->u[1]);
470
471 oh = th << (shift * 8);
472 ol = tl << (shift * 8);
473 oh |= tl >> (64 - shift * 8);
474 out->u[0] = (uint32_t) (ol >> 32);
475 out->u[1] = (uint32_t) ol;
476 out->u[2] = (uint32_t) (oh >> 32);
477 out->u[3] = (uint32_t) oh;
478 }
479 #else
480 inline static void
481 lshift128 (w128_t *out, w128_t const *in, int shift)
482 {
483 uint64_t th, tl, oh, ol;
484
485 th = ((uint64_t) in->u[3] << 32) | ((uint64_t) in->u[2]);
486 tl = ((uint64_t) in->u[1] << 32) | ((uint64_t) in->u[0]);
487
488 oh = th << (shift * 8);
489 ol = tl << (shift * 8);
490 oh |= tl >> (64 - shift * 8);
491 out->u[1] = (uint32_t) (ol >> 32);
492 out->u[0] = (uint32_t) ol;
493 out->u[3] = (uint32_t) (oh >> 32);
494 out->u[2] = (uint32_t) oh;
495 }
496 #endif
497
498 /**
499 * This function represents the recursion formula.
500 * @param r output
501 * @param a a 128-bit part of the internal state array
502 * @param b a 128-bit part of the internal state array
503 * @param c a 128-bit part of the internal state array
504 * @param d a 128-bit part of the internal state array
505 */
506 #ifdef ONLY64
507 inline static void
508 do_recursion (w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d)
509 {
510 w128_t x;
511 w128_t y;
512
513 lshift128 (&x, a, SL2);
514 rshift128 (&y, c, SR2);
515 r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0] ^ (d->u[0] << SL1);
516 r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1] ^ (d->u[1] << SL1);
517 r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2] ^ (d->u[2] << SL1);
518 r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3] ^ (d->u[3] << SL1);
519 }
520 #else
521 inline static void
522 do_recursion (w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d)
523 {
524 w128_t x;
525 w128_t y;
526
527 lshift128 (&x, a, SL2);
528 rshift128 (&y, c, SR2);
529 r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0] ^ (d->u[0] << SL1);
530 r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1] ^ (d->u[1] << SL1);
531 r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2] ^ (d->u[2] << SL1);
532 r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3] ^ (d->u[3] << SL1);
533 }
534 #endif
535
536 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
537 /**
538 * This function fills the internal state array with pseudorandom
539 * integers.
540 */
541 inline static void
542 gen_rand_all (void)
543 {
544 int i;
545 w128_t *r1, *r2;
546
547 r1 = &sfmt[N - 2];
548 r2 = &sfmt[N - 1];
549 for (i = 0; i < N - POS1; i++)
550 {
551 do_recursion (&sfmt[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
552 r1 = r2;
553 r2 = &sfmt[i];
554 }
555 for (; i < N; i++)
556 {
557 do_recursion (&sfmt[i], &sfmt[i], &sfmt[i + POS1 - N], r1, r2);
558 r1 = r2;
559 r2 = &sfmt[i];
560 }
561 }
562
563 /**
564 * This function fills the user-specified array with pseudorandom
565 * integers.
566 *
567 * @param array an 128-bit array to be filled by pseudorandom numbers.
568 * @param size number of 128-bit pseudorandom numbers to be generated.
569 */
570 inline static void
571 gen_rand_array (w128_t *array, int size)
572 {
573 int i, j;
574 w128_t *r1, *r2;
575
576 r1 = &sfmt[N - 2];
577 r2 = &sfmt[N - 1];
578 for (i = 0; i < N - POS1; i++)
579 {
580 do_recursion (&array[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
581 r1 = r2;
582 r2 = &array[i];
583 }
584 for (; i < N; i++)
585 {
586 do_recursion (&array[i], &sfmt[i], &array[i + POS1 - N], r1, r2);
587 r1 = r2;
588 r2 = &array[i];
589 }
590 for (; i < size - N; i++)
591 {
592 do_recursion (&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
593 r1 = r2;
594 r2 = &array[i];
595 }
596 for (j = 0; j < 2 * N - size; j++)
597 {
598 sfmt[j] = array[j + size - N];
599 }
600 for (; i < size; i++, j++)
601 {
602 do_recursion (&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
603 r1 = r2;
604 r2 = &array[i];
605 sfmt[j] = array[i];
606 }
607 }
608 #endif
609
610 #if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
611 inline static void
612 swap (w128_t *array, int size)
613 {
614 int i;
615 uint32_t x, y;
616
617 for (i = 0; i < size; i++)
618 {
619 x = array[i].u[0];
620 y = array[i].u[2];
621 array[i].u[0] = array[i].u[1];
622 array[i].u[2] = array[i].u[3];
623 array[i].u[1] = x;
624 array[i].u[3] = y;
625 }
626 }
627 #endif
628 /**
629 * This function represents a function used in the initialization
630 * by init_by_array
631 * @param x 32-bit integer
632 * @return 32-bit integer
633 */
634 inline static uint32_t
635 func1 (uint32_t x)
636 {
637 return (x ^ (x >> 27)) * (uint32_t) 1664525UL;
638 }
639
640 /**
641 * This function represents a function used in the initialization
642 * by init_by_array
643 * @param x 32-bit integer
644 * @return 32-bit integer
645 */
646 inline static uint32_t
647 func2 (uint32_t x)
648 {
649 return (x ^ (x >> 27)) * (uint32_t) 1566083941UL;
650 }
651
652 /**
653 * This function certificate the period of 2^{MEXP}
654 */
655 static void
656 period_certification (void)
657 {
658 int inner = 0;
659 int i, j;
660 uint32_t work;
661
662 for (i = 0; i < 4; i++)
663 inner ^= psfmt32[idxof (i)] & parity[i];
664 for (i = 16; i > 0; i >>= 1)
665 inner ^= inner >> i;
666 inner &= 1;
667 /* check OK */
668 if (inner == 1)
669 {
670 return;
671 }
672 /* check NG, and modification */
673 for (i = 0; i < 4; i++)
674 {
675 work = 1;
676 for (j = 0; j < 32; j++)
677 {
678 if ((work & parity[i]) != 0)
679 {
680 psfmt32[idxof (i)] ^= work;
681 return;
682 }
683 work = work << 1;
684 }
685 }
686 }
687
688 /*----------------
689 PUBLIC FUNCTIONS
690 ----------------*/
691 /**
692 * This function returns the identification string.
693 * The string shows the word size, the Mersenne exponent,
694 * and all parameters of this generator.
695 */
696 char const * const
697 get_idstring (void)
698 {
699 return IDSTR;
700 }
701
702 /**
703 * This function returns the minimum size of array used for \b
704 * fill_array32() function.
705 * @return minimum size of array used for fill_array32() function.
706 */
707 int
708 get_min_array_size32 (void)
709 {
710 return N32;
711 }
712
713 /**
714 * This function returns the minimum size of array used for \b
715 * fill_array64() function.
716 * @return minimum size of array used for fill_array64() function.
717 */
718 int
719 get_min_array_size64 (void)
720 {
721 return N64;
722 }
723
724 #ifndef ONLY64
725 /**
726 * This function generates and returns 32-bit pseudorandom number.
727 * init_gen_rand or init_by_array must be called before this function.
728 * @return 32-bit pseudorandom number
729 */
730 uint32_t
731 gen_rand32 (void)
732 {
733 uint32_t r;
734
735 assert (initialized);
736 if (idx >= N32)
737 {
738 gen_rand_all ();
739 idx = 0;
740 }
741 r = psfmt32[idx++];
742 return r;
743 }
744 #endif
745 /**
746 * This function generates and returns 64-bit pseudorandom number.
747 * init_gen_rand or init_by_array must be called before this function.
748 * The function gen_rand64 should not be called after gen_rand32,
749 * unless an initialization is again executed.
750 * @return 64-bit pseudorandom number
751 */
752 uint64_t
753 gen_rand64 (void)
754 {
755 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
756 uint32_t r1, r2;
757 #else
758 uint64_t r;
759 #endif
760
761 assert (initialized);
762 assert (idx % 2 == 0);
763
764 if (idx >= N32)
765 {
766 gen_rand_all ();
767 idx = 0;
768 }
769 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
770 r1 = psfmt32[idx];
771 r2 = psfmt32[idx + 1];
772 idx += 2;
773 return ((uint64_t) r2 << 32) | r1;
774 #else
775 r = psfmt64[idx / 2];
776 idx += 2;
777 return r;
778 #endif
779 }
780
781 #ifndef ONLY64
782 /**
783 * This function generates pseudorandom 32-bit integers in the
784 * specified array[] by one call. The number of pseudorandom integers
785 * is specified by the argument size, which must be at least 624 and a
786 * multiple of four. The generation by this function is much faster
787 * than the following gen_rand function.
788 *
789 * For initialization, init_gen_rand or init_by_array must be called
790 * before the first call of this function. This function can not be
791 * used after calling gen_rand function, without initialization.
792 *
793 * @param array an array where pseudorandom 32-bit integers are filled
794 * by this function. The pointer to the array must be \b "aligned"
795 * (namely, must be a multiple of 16) in the SIMD version, since it
796 * refers to the address of a 128-bit integer. In the standard C
797 * version, the pointer is arbitrary.
798 *
799 * @param size the number of 32-bit pseudorandom integers to be
800 * generated. size must be a multiple of 4, and greater than or equal
801 * to (MEXP / 128 + 1) * 4.
802 *
803 * @note \b memalign or \b posix_memalign is available to get aligned
804 * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
805 * returns the pointer to the aligned memory block.
806 */
807 void
808 fill_array32 (uint32_t * array, int size)
809 {
810 assert (initialized);
811 assert (idx == N32);
812 assert (size % 4 == 0);
813 assert (size >= N32);
814
815 gen_rand_array (reinterpret_cast<w128_t *> (array), size / 4);
816 idx = N32;
817 }
818 #endif
819
820 /**
821 * This function generates pseudorandom 64-bit integers in the
822 * specified array[] by one call. The number of pseudorandom integers
823 * is specified by the argument size, which must be at least 312 and a
824 * multiple of two. The generation by this function is much faster
825 * than the following gen_rand function.
826 *
827 * For initialization, init_gen_rand or init_by_array must be called
828 * before the first call of this function. This function can not be
829 * used after calling gen_rand function, without initialization.
830 *
831 * @param array an array where pseudorandom 64-bit integers are filled
832 * by this function. The pointer to the array must be "aligned"
833 * (namely, must be a multiple of 16) in the SIMD version, since it
834 * refers to the address of a 128-bit integer. In the standard C
835 * version, the pointer is arbitrary.
836 *
837 * @param size the number of 64-bit pseudorandom integers to be
838 * generated. size must be a multiple of 2, and greater than or equal
839 * to (MEXP / 128 + 1) * 2
840 *
841 * @note \b memalign or \b posix_memalign is available to get aligned
842 * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
843 * returns the pointer to the aligned memory block.
844 */
845 void
846 fill_array64 (uint64_t * array, int size)
847 {
848 assert (initialized);
849 assert (idx == N32);
850 assert (size % 2 == 0);
851 assert (size >= N64);
852
853 gen_rand_array (reinterpret_cast<w128_t *> (array), size / 2);
854 idx = N32;
855
856 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
857 swap (reinterpret_cast<w128_t *> (array), size / 2);
858 #endif
859 }
860
861 /**
862 * This function initializes the internal state array with a 32-bit
863 * integer seed.
864 *
865 * @param seed a 32-bit integer used as the seed.
866 */
867 void
868 init_gen_rand (uint32_t seed)
869 {
870 int i;
871
872 psfmt32[idxof (0)] = seed;
873 for (i = 1; i < N32; i++)
874 {
875 psfmt32[idxof (i)] = 1812433253UL * (psfmt32[idxof (i - 1)] ^ (psfmt32[idxof (i - 1)] >> 30)) + i;
876 }
877 idx = N32;
878 period_certification ();
879 initialized = 1;
880 }
881
882 /**
883 * This function initializes the internal state array,
884 * with an array of 32-bit integers used as the seeds
885 * @param init_key the array of 32-bit integers, used as a seed.
886 * @param key_length the length of init_key.
887 */
888 void
889 init_by_array (uint32_t * init_key, int key_length)
890 {
891 int i, j, count;
892 uint32_t r;
893 int lag;
894 int mid;
895 int size = N * 4;
896
897 if (size >= 623)
898 {
899 lag = 11;
900 }
901 else if (size >= 68)
902 {
903 lag = 7;
904 }
905 else if (size >= 39)
906 {
907 lag = 5;
908 }
909 else
910 {
911 lag = 3;
912 }
913 mid = (size - lag) / 2;
914
915 memset (sfmt, 0x8b, sizeof (sfmt));
916 if (key_length + 1 > N32)
917 {
918 count = key_length + 1;
919 }
920 else
921 {
922 count = N32;
923 }
924 r = func1 (psfmt32[idxof (0)] ^ psfmt32[idxof (mid)] ^ psfmt32[idxof (N32 - 1)]);
925 psfmt32[idxof (mid)] += r;
926 r += key_length;
927 psfmt32[idxof (mid + lag)] += r;
928 psfmt32[idxof (0)] = r;
929
930 count--;
931 for (i = 1, j = 0; (j < count) && (j < key_length); j++)
932 {
933 r = func1 (psfmt32[idxof (i)] ^ psfmt32[idxof ((i + mid) % N32)] ^ psfmt32[idxof ((i + N32 - 1) % N32)]);
934 psfmt32[idxof ((i + mid) % N32)] += r;
935 r += init_key[j] + i;
936 psfmt32[idxof ((i + mid + lag) % N32)] += r;
937 psfmt32[idxof (i)] = r;
938 i = (i + 1) % N32;
939 }
940 for (; j < count; j++)
941 {
942 r = func1 (psfmt32[idxof (i)] ^ psfmt32[idxof ((i + mid) % N32)] ^ psfmt32[idxof ((i + N32 - 1) % N32)]);
943 psfmt32[idxof ((i + mid) % N32)] += r;
944 r += i;
945 psfmt32[idxof ((i + mid + lag) % N32)] += r;
946 psfmt32[idxof (i)] = r;
947 i = (i + 1) % N32;
948 }
949 for (j = 0; j < N32; j++)
950 {
951 r = func2 (psfmt32[idxof (i)] + psfmt32[idxof ((i + mid) % N32)] + psfmt32[idxof ((i + N32 - 1) % N32)]);
952 psfmt32[idxof ((i + mid) % N32)] ^= r;
953 r -= i;
954 psfmt32[idxof ((i + mid + lag) % N32)] ^= r;
955 psfmt32[idxof (i)] = r;
956 i = (i + 1) % N32;
957 }
958
959 idx = N32;
960 period_certification ();
961 initialized = 1;
962 }