ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/ermyth/src/random.C
Revision: 1.1
Committed: Tue Aug 28 17:18:26 2007 UTC (16 years, 9 months ago) by pippijn
Content type: text/plain
Branch: MAIN
Log Message:
added new files

File Contents

# Content
1 /*
2 * random.C: SIMD oriented Fast Mersenne Twister(SFMT)
3 *
4 * Copyright © 2007 Pippijn van Steenhoven / The Ermyth Team
5 * Copyright © 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
6 * University. All rights reserved.
7 *
8 * @author Mutsuo Saito (Hiroshima University)
9 * @author Makoto Matsumoto (Hiroshima University)
10 *
11 * The new BSD License is applied to this software, see doc/poddoc/license.pod
12 *
13 * $Id$
14 */
15 #include <string.h>
16 #include <assert.h>
17
18 #include <common/random.h>
19
20 /*-----------------
21 BASIC DEFINITIONS
22 -----------------*/
23 /** SFMT generator has an internal state array of 128-bit integers,
24 * and N is its size. */
25 #define N (216091 / 128 + 1)
26 /** N32 is the size of internal state array when regarded as an array
27 * of 32-bit integers.*/
28 #define N32 (N * 4)
29 /** N64 is the size of internal state array when regarded as an array
30 * of 64-bit integers.*/
31 #define N64 (N * 2)
32
33 /*----------------------
34 the parameters of SFMT
35 following definitions are in paramsXXXX.h file.
36 ----------------------*/
37 // the pick up position of the array.
38 #define POS1 627
39 // the parameter of shift left as four 32-bit registers.
40 #define SL1 11
41 // the parameter of shift left as one 128-bit register.
42 // The 128-bit integer is shifted by (SL2 * 8) bits.
43 #define SL2 3
44 // the parameter of shift right as four 32-bit registers.
45 #define SR1 10
46 // the parameter of shift right as one 128-bit register.
47 // The 128-bit integer is shifted by (SL2 * 8) bits.
48 #define SR2 1
49 // A bitmask, used in the recursion. These parameters are introduced
50 // to break symmetry of SIMD.
51 #define MSK1 0xbff7bff7U
52 #define MSK2 0xbfffffffU
53 #define MSK3 0xbffffa7fU
54 #define MSK4 0xffddfbfbU
55 // These definitions are part of a 128-bit period certification vector.
56 #define PARITY1 0xf8000001U
57 #define PARITY2 0x89e80709U
58 #define PARITY3 0x3bd2b64bU
59 #define PARITY4 0x0c64b1e4U
60
61 /* PARAMETERS FOR ALTIVEC */
62 #if defined(__APPLE__) /* For OSX */
63 #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
64 #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
65 #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
66 #define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
67 #define ALTI_SL2_PERM (vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
68 #define ALTI_SL2_PERM64 (vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
69 #define ALTI_SR2_PERM (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
70 #define ALTI_SR2_PERM64 (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
71 #else // !__APPLE__
72 #define ALTI_SL1 {SL1, SL1, SL1, SL1}
73 #define ALTI_SR1 {SR1, SR1, SR1, SR1}
74 #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4}
75 #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3}
76 #define ALTI_SL2_PERM { 3,21,21,21, 7, 0, 1, 2,11, 4, 5, 6,15, 8, 9,10}
77 #define ALTI_SL2_PERM64 { 3, 4, 5, 6, 7,29,29,29,11,12,13,14,15, 0, 1, 2}
78 #define ALTI_SR2_PERM { 7, 0, 1, 2,11, 4, 5, 6,15, 8, 9,10,17,12,13,14}
79 #define ALTI_SR2_PERM64 {15, 0, 1, 2, 3, 4, 5, 6,17, 8, 9,10,11,12,13,14}
80 #endif // __APPLE__
81 #define IDSTR "SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb"
82
83 #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)
84 #define BIG_ENDIAN64 1
85 #endif
86 #if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64)
87 #define BIG_ENDIAN64 1
88 #endif
89 #if defined(ONLY64) && !defined(BIG_ENDIAN64)
90 #if defined(__GNUC__)
91 #error "-DONLY64 must be specified with -DBIG_ENDIAN64"
92 #endif
93 #undef ONLY64
94 #endif
95
96 /*--------------------------------------
97 FILE GLOBAL VARIABLES
98 internal state, index counter and flag
99 --------------------------------------*/
100 /** the 128-bit internal state array */
101 static w128_t sfmt[N];
102 /** the 32bit integer pointer to the 128-bit internal state array */
103 static uint32_t *psfmt32 = &sfmt[0].u[0];
104 #if !defined(BIG_ENDIAN64) || defined(ONLY64)
105 /** the 64bit integer pointer to the 128-bit internal state array */
106 static uint64_t *psfmt64 = reinterpret_cast<uint64_t *> (&sfmt[0].u[0]);
107 #endif
108 /** index counter to the 32-bit internal state array */
109 static int idx;
110 /** a flag: it is 0 if and only if the internal state is not yet
111 * initialized. */
112 static int initialized = 0;
113 /** a parity check vector which certificate the period of 2^{MEXP} */
114 static uint32_t parity[4] = { PARITY1, PARITY2, PARITY3, PARITY4 };
115
116 /*----------------
117 STATIC FUNCTIONS
118 ----------------*/
119 inline static int idxof (int i);
120 inline static void rshift128 (w128_t *out, w128_t const *in, int shift);
121 inline static void lshift128 (w128_t *out, w128_t const *in, int shift);
122 inline static void gen_rand_all (void);
123 inline static void gen_rand_array (w128_t *array, int size);
124 inline static uint32_t func1 (uint32_t x);
125 inline static uint32_t func2 (uint32_t x);
126 inline static void period_certification (void);
127 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
128 inline static void swap (w128_t *array, int size);
129 #endif
130
131 #if defined(HAVE_ALTIVEC)
132 inline static vector unsigned int
133 vec_recursion (vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d) ALWAYSINLINE;
134
135 /**
136 * This function represents the recursion formula in AltiVec and BIG ENDIAN.
137 * @param a a 128-bit part of the interal state array
138 * @param b a 128-bit part of the interal state array
139 * @param c a 128-bit part of the interal state array
140 * @param d a 128-bit part of the interal state array
141 * @return output
142 */
143 inline static vector unsigned int vec_recursion (vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d)
144 {
145 const vector unsigned int sl1 = ALTI_SL1;
146 const vector unsigned int sr1 = ALTI_SR1;
147 #ifdef ONLY64
148 const vector unsigned int mask = ALTI_MSK64;
149 const vector unsigned char perm_sl = ALTI_SL2_PERM64;
150 const vector unsigned char perm_sr = ALTI_SR2_PERM64;
151 #else
152 const vector unsigned int mask = ALTI_MSK;
153 const vector unsigned char perm_sl = ALTI_SL2_PERM;
154 const vector unsigned char perm_sr = ALTI_SR2_PERM;
155 #endif
156 vector unsigned int v, w, x, y, z;
157 x = vec_perm (a, (vector unsigned int) perm_sl, perm_sl);
158 v = a;
159 y = vec_sr (b, sr1);
160 z = vec_perm (c, (vector unsigned int) perm_sr, perm_sr);
161 w = vec_sl (d, sl1);
162 z = vec_xor (z, w);
163 y = vec_and (y, mask);
164 v = vec_xor (v, x);
165 z = vec_xor (z, y);
166 z = vec_xor (z, v);
167 return z;
168 }
169
170 /**
171 * This function fills the internal state array with pseudorandom
172 * integers.
173 */
174 inline static void
175 gen_rand_all (void)
176 {
177 int i;
178 vector unsigned int r, r1, r2;
179
180 r1 = sfmt[N - 2].s;
181 r2 = sfmt[N - 1].s;
182 for (i = 0; i < N - POS1; i++)
183 {
184 r = vec_recursion (sfmt[i].s, sfmt[i + POS1].s, r1, r2);
185 sfmt[i].s = r;
186 r1 = r2;
187 r2 = r;
188 }
189 for (; i < N; i++)
190 {
191 r = vec_recursion (sfmt[i].s, sfmt[i + POS1 - N].s, r1, r2);
192 sfmt[i].s = r;
193 r1 = r2;
194 r2 = r;
195 }
196 }
197
198 /**
199 * This function fills the user-specified array with pseudorandom
200 * integers.
201 *
202 * @param array an 128-bit array to be filled by pseudorandom numbers.
203 * @param size number of 128-bit pesudorandom numbers to be generated.
204 */
205 inline static void
206 gen_rand_array (w128_t *array, int size)
207 {
208 int i, j;
209 vector unsigned int r, r1, r2;
210
211 r1 = sfmt[N - 2].s;
212 r2 = sfmt[N - 1].s;
213 for (i = 0; i < N - POS1; i++)
214 {
215 r = vec_recursion (sfmt[i].s, sfmt[i + POS1].s, r1, r2);
216 array[i].s = r;
217 r1 = r2;
218 r2 = r;
219 }
220 for (; i < N; i++)
221 {
222 r = vec_recursion (sfmt[i].s, array[i + POS1 - N].s, r1, r2);
223 array[i].s = r;
224 r1 = r2;
225 r2 = r;
226 }
227 /* main loop */
228 for (; i < size - N; i++)
229 {
230 r = vec_recursion (array[i - N].s, array[i + POS1 - N].s, r1, r2);
231 array[i].s = r;
232 r1 = r2;
233 r2 = r;
234 }
235 for (j = 0; j < 2 * N - size; j++)
236 {
237 sfmt[j].s = array[j + size - N].s;
238 }
239 for (; i < size; i++)
240 {
241 r = vec_recursion (array[i - N].s, array[i + POS1 - N].s, r1, r2);
242 array[i].s = r;
243 sfmt[j++].s = r;
244 r1 = r2;
245 r2 = r;
246 }
247 }
248
249 #ifndef ONLY64
250 #if defined(__APPLE__)
251 #define ALTI_SWAP (vector unsigned char) \
252 (4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)
253 #else
254 #define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}
255 #endif
256 /**
257 * This function swaps high and low 32-bit of 64-bit integers in user
258 * specified array.
259 *
260 * @param array an 128-bit array to be swaped.
261 * @param size size of 128-bit array.
262 */
263 inline static void
264 swap (w128_t *array, int size)
265 {
266 int i;
267 const vector unsigned char perm = ALTI_SWAP;
268
269 for (i = 0; i < size; i++)
270 {
271 array[i].s = vec_perm (array[i].s, (vector unsigned int) perm, perm);
272 }
273 }
274 #endif
275 #elif defined(HAVE_SSE2)
276 inline static __m128i
277 mm_recursion (__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask);
278
279 /**
280 * This function represents the recursion formula.
281 * @param a a 128-bit part of the interal state array
282 * @param b a 128-bit part of the interal state array
283 * @param c a 128-bit part of the interal state array
284 * @param d a 128-bit part of the interal state array
285 * @param mask 128-bit mask
286 * @return output
287 */
288 inline static __m128i mm_recursion (__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask)
289 {
290 __m128i v, x, y, z;
291
292 x = _mm_load_si128 (a);
293 y = _mm_srli_epi32 (*b, SR1);
294 z = _mm_srli_si128 (c, SR2);
295 v = _mm_slli_epi32 (d, SL1);
296 z = _mm_xor_si128 (z, x);
297 z = _mm_xor_si128 (z, v);
298 x = _mm_slli_si128 (x, SL2);
299 y = _mm_and_si128 (y, mask);
300 z = _mm_xor_si128 (z, x);
301 z = _mm_xor_si128 (z, y);
302 return z;
303 }
304
305 /**
306 * This function fills the internal state array with pseudorandom
307 * integers.
308 */
309 inline static void
310 gen_rand_all (void)
311 {
312 int i;
313 __m128i r, r1, r2, mask;
314 mask = _mm_set_epi32 (MSK4, MSK3, MSK2, MSK1);
315
316 r1 = _mm_load_si128 (&sfmt[N - 2].si);
317 r2 = _mm_load_si128 (&sfmt[N - 1].si);
318 for (i = 0; i < N - POS1; i++)
319 {
320 r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask);
321 _mm_store_si128 (&sfmt[i].si, r);
322 r1 = r2;
323 r2 = r;
324 }
325 for (; i < N; i++)
326 {
327 r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1 - N].si, r1, r2, mask);
328 _mm_store_si128 (&sfmt[i].si, r);
329 r1 = r2;
330 r2 = r;
331 }
332 }
333
334 /**
335 * This function fills the user-specified array with pseudorandom
336 * integers.
337 *
338 * @param array an 128-bit array to be filled by pseudorandom numbers.
339 * @param size number of 128-bit pesudorandom numbers to be generated.
340 */
341 inline static void
342 gen_rand_array (w128_t *array, int size)
343 {
344 int i, j;
345 __m128i r, r1, r2, mask;
346 mask = _mm_set_epi32 (MSK4, MSK3, MSK2, MSK1);
347
348 r1 = _mm_load_si128 (&sfmt[N - 2].si);
349 r2 = _mm_load_si128 (&sfmt[N - 1].si);
350 for (i = 0; i < N - POS1; i++)
351 {
352 r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask);
353 _mm_store_si128 (&array[i].si, r);
354 r1 = r2;
355 r2 = r;
356 }
357 for (; i < N; i++)
358 {
359 r = mm_recursion (&sfmt[i].si, &array[i + POS1 - N].si, r1, r2, mask);
360 _mm_store_si128 (&array[i].si, r);
361 r1 = r2;
362 r2 = r;
363 }
364 /* main loop */
365 for (; i < size - N; i++)
366 {
367 r = mm_recursion (&array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
368 _mm_store_si128 (&array[i].si, r);
369 r1 = r2;
370 r2 = r;
371 }
372 for (j = 0; j < 2 * N - size; j++)
373 {
374 r = _mm_load_si128 (&array[j + size - N].si);
375 _mm_store_si128 (&sfmt[j].si, r);
376 }
377 for (; i < size; i++)
378 {
379 r = mm_recursion (&array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
380 _mm_store_si128 (&array[i].si, r);
381 _mm_store_si128 (&sfmt[j++].si, r);
382 r1 = r2;
383 r2 = r;
384 }
385 }
386 #endif
387
388 /**
389 * This function simulate a 64-bit index of LITTLE ENDIAN
390 * in BIG ENDIAN machine.
391 */
392 #ifdef ONLY64
393 inline static int
394 idxof (int i)
395 {
396 return i ^ 1;
397 }
398 #else
399 inline static int
400 idxof (int i)
401 {
402 return i;
403 }
404 #endif
405 /**
406 * This function simulates SIMD 128-bit right shift by the standard C.
407 * The 128-bit integer given in in is shifted by (shift * 8) bits.
408 * This function simulates the LITTLE ENDIAN SIMD.
409 * @param out the output of this function
410 * @param in the 128-bit data to be shifted
411 * @param shift the shift value
412 */
413 #ifdef ONLY64
414 inline static void
415 rshift128 (w128_t *out, w128_t const *in, int shift)
416 {
417 uint64_t th, tl, oh, ol;
418
419 th = ((uint64_t) in->u[2] << 32) | ((uint64_t) in->u[3]);
420 tl = ((uint64_t) in->u[0] << 32) | ((uint64_t) in->u[1]);
421
422 oh = th >> (shift * 8);
423 ol = tl >> (shift * 8);
424 ol |= th << (64 - shift * 8);
425 out->u[0] = (uint32_t) (ol >> 32);
426 out->u[1] = (uint32_t) ol;
427 out->u[2] = (uint32_t) (oh >> 32);
428 out->u[3] = (uint32_t) oh;
429 }
430 #else
431 inline static void
432 rshift128 (w128_t *out, w128_t const *in, int shift)
433 {
434 uint64_t th, tl, oh, ol;
435
436 th = ((uint64_t) in->u[3] << 32) | ((uint64_t) in->u[2]);
437 tl = ((uint64_t) in->u[1] << 32) | ((uint64_t) in->u[0]);
438
439 oh = th >> (shift * 8);
440 ol = tl >> (shift * 8);
441 ol |= th << (64 - shift * 8);
442 out->u[1] = (uint32_t) (ol >> 32);
443 out->u[0] = (uint32_t) ol;
444 out->u[3] = (uint32_t) (oh >> 32);
445 out->u[2] = (uint32_t) oh;
446 }
447 #endif
448 /**
449 * This function simulates SIMD 128-bit left shift by the standard C.
450 * The 128-bit integer given in in is shifted by (shift * 8) bits.
451 * This function simulates the LITTLE ENDIAN SIMD.
452 * @param out the output of this function
453 * @param in the 128-bit data to be shifted
454 * @param shift the shift value
455 */
456 #ifdef ONLY64
457 inline static void
458 lshift128 (w128_t *out, w128_t const *in, int shift)
459 {
460 uint64_t th, tl, oh, ol;
461
462 th = ((uint64_t) in->u[2] << 32) | ((uint64_t) in->u[3]);
463 tl = ((uint64_t) in->u[0] << 32) | ((uint64_t) in->u[1]);
464
465 oh = th << (shift * 8);
466 ol = tl << (shift * 8);
467 oh |= tl >> (64 - shift * 8);
468 out->u[0] = (uint32_t) (ol >> 32);
469 out->u[1] = (uint32_t) ol;
470 out->u[2] = (uint32_t) (oh >> 32);
471 out->u[3] = (uint32_t) oh;
472 }
473 #else
474 inline static void
475 lshift128 (w128_t *out, w128_t const *in, int shift)
476 {
477 uint64_t th, tl, oh, ol;
478
479 th = ((uint64_t) in->u[3] << 32) | ((uint64_t) in->u[2]);
480 tl = ((uint64_t) in->u[1] << 32) | ((uint64_t) in->u[0]);
481
482 oh = th << (shift * 8);
483 ol = tl << (shift * 8);
484 oh |= tl >> (64 - shift * 8);
485 out->u[1] = (uint32_t) (ol >> 32);
486 out->u[0] = (uint32_t) ol;
487 out->u[3] = (uint32_t) (oh >> 32);
488 out->u[2] = (uint32_t) oh;
489 }
490 #endif
491
492 /**
493 * This function represents the recursion formula.
494 * @param r output
495 * @param a a 128-bit part of the internal state array
496 * @param b a 128-bit part of the internal state array
497 * @param c a 128-bit part of the internal state array
498 * @param d a 128-bit part of the internal state array
499 */
500 #ifdef ONLY64
501 inline static void
502 do_recursion (w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d)
503 {
504 w128_t x;
505 w128_t y;
506
507 lshift128 (&x, a, SL2);
508 rshift128 (&y, c, SR2);
509 r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0] ^ (d->u[0] << SL1);
510 r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1] ^ (d->u[1] << SL1);
511 r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2] ^ (d->u[2] << SL1);
512 r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3] ^ (d->u[3] << SL1);
513 }
514 #else
515 inline static void
516 do_recursion (w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d)
517 {
518 w128_t x;
519 w128_t y;
520
521 lshift128 (&x, a, SL2);
522 rshift128 (&y, c, SR2);
523 r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0] ^ (d->u[0] << SL1);
524 r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1] ^ (d->u[1] << SL1);
525 r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2] ^ (d->u[2] << SL1);
526 r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3] ^ (d->u[3] << SL1);
527 }
528 #endif
529
530 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
531 /**
532 * This function fills the internal state array with pseudorandom
533 * integers.
534 */
535 inline static void
536 gen_rand_all (void)
537 {
538 int i;
539 w128_t *r1, *r2;
540
541 r1 = &sfmt[N - 2];
542 r2 = &sfmt[N - 1];
543 for (i = 0; i < N - POS1; i++)
544 {
545 do_recursion (&sfmt[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
546 r1 = r2;
547 r2 = &sfmt[i];
548 }
549 for (; i < N; i++)
550 {
551 do_recursion (&sfmt[i], &sfmt[i], &sfmt[i + POS1 - N], r1, r2);
552 r1 = r2;
553 r2 = &sfmt[i];
554 }
555 }
556
557 /**
558 * This function fills the user-specified array with pseudorandom
559 * integers.
560 *
561 * @param array an 128-bit array to be filled by pseudorandom numbers.
562 * @param size number of 128-bit pseudorandom numbers to be generated.
563 */
564 inline static void
565 gen_rand_array (w128_t *array, int size)
566 {
567 int i, j;
568 w128_t *r1, *r2;
569
570 r1 = &sfmt[N - 2];
571 r2 = &sfmt[N - 1];
572 for (i = 0; i < N - POS1; i++)
573 {
574 do_recursion (&array[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
575 r1 = r2;
576 r2 = &array[i];
577 }
578 for (; i < N; i++)
579 {
580 do_recursion (&array[i], &sfmt[i], &array[i + POS1 - N], r1, r2);
581 r1 = r2;
582 r2 = &array[i];
583 }
584 for (; i < size - N; i++)
585 {
586 do_recursion (&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
587 r1 = r2;
588 r2 = &array[i];
589 }
590 for (j = 0; j < 2 * N - size; j++)
591 {
592 sfmt[j] = array[j + size - N];
593 }
594 for (; i < size; i++, j++)
595 {
596 do_recursion (&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
597 r1 = r2;
598 r2 = &array[i];
599 sfmt[j] = array[i];
600 }
601 }
602 #endif
603
604 #if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
605 inline static void
606 swap (w128_t *array, int size)
607 {
608 int i;
609 uint32_t x, y;
610
611 for (i = 0; i < size; i++)
612 {
613 x = array[i].u[0];
614 y = array[i].u[2];
615 array[i].u[0] = array[i].u[1];
616 array[i].u[2] = array[i].u[3];
617 array[i].u[1] = x;
618 array[i].u[3] = y;
619 }
620 }
621 #endif
622 /**
623 * This function represents a function used in the initialization
624 * by init_by_array
625 * @param x 32-bit integer
626 * @return 32-bit integer
627 */
628 inline static uint32_t
629 func1 (uint32_t x)
630 {
631 return (x ^ (x >> 27)) * (uint32_t) 1664525UL;
632 }
633
634 /**
635 * This function represents a function used in the initialization
636 * by init_by_array
637 * @param x 32-bit integer
638 * @return 32-bit integer
639 */
640 inline static uint32_t
641 func2 (uint32_t x)
642 {
643 return (x ^ (x >> 27)) * (uint32_t) 1566083941UL;
644 }
645
646 /**
647 * This function certificate the period of 2^{MEXP}
648 */
649 static void
650 period_certification (void)
651 {
652 int inner = 0;
653 int i, j;
654 uint32_t work;
655
656 for (i = 0; i < 4; i++)
657 inner ^= psfmt32[idxof (i)] & parity[i];
658 for (i = 16; i > 0; i >>= 1)
659 inner ^= inner >> i;
660 inner &= 1;
661 /* check OK */
662 if (inner == 1)
663 {
664 return;
665 }
666 /* check NG, and modification */
667 for (i = 0; i < 4; i++)
668 {
669 work = 1;
670 for (j = 0; j < 32; j++)
671 {
672 if ((work & parity[i]) != 0)
673 {
674 psfmt32[idxof (i)] ^= work;
675 return;
676 }
677 work = work << 1;
678 }
679 }
680 }
681
682 /*----------------
683 PUBLIC FUNCTIONS
684 ----------------*/
685 /**
686 * This function returns the identification string.
687 * The string shows the word size, the Mersenne exponent,
688 * and all parameters of this generator.
689 */
690 char const * const
691 get_idstring (void)
692 {
693 return IDSTR;
694 }
695
696 /**
697 * This function returns the minimum size of array used for \b
698 * fill_array32() function.
699 * @return minimum size of array used for fill_array32() function.
700 */
701 int
702 get_min_array_size32 (void)
703 {
704 return N32;
705 }
706
707 /**
708 * This function returns the minimum size of array used for \b
709 * fill_array64() function.
710 * @return minimum size of array used for fill_array64() function.
711 */
712 int
713 get_min_array_size64 (void)
714 {
715 return N64;
716 }
717
718 #ifndef ONLY64
719 /**
720 * This function generates and returns 32-bit pseudorandom number.
721 * init_gen_rand or init_by_array must be called before this function.
722 * @return 32-bit pseudorandom number
723 */
724 uint32_t
725 gen_rand32 (void)
726 {
727 uint32_t r;
728
729 assert (initialized);
730 if (idx >= N32)
731 {
732 gen_rand_all ();
733 idx = 0;
734 }
735 r = psfmt32[idx++];
736 return r;
737 }
738 #endif
739 /**
740 * This function generates and returns 64-bit pseudorandom number.
741 * init_gen_rand or init_by_array must be called before this function.
742 * The function gen_rand64 should not be called after gen_rand32,
743 * unless an initialization is again executed.
744 * @return 64-bit pseudorandom number
745 */
746 uint64_t
747 gen_rand64 (void)
748 {
749 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
750 uint32_t r1, r2;
751 #else
752 uint64_t r;
753 #endif
754
755 assert (initialized);
756 assert (idx % 2 == 0);
757
758 if (idx >= N32)
759 {
760 gen_rand_all ();
761 idx = 0;
762 }
763 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
764 r1 = psfmt32[idx];
765 r2 = psfmt32[idx + 1];
766 idx += 2;
767 return ((uint64_t) r2 << 32) | r1;
768 #else
769 r = psfmt64[idx / 2];
770 idx += 2;
771 return r;
772 #endif
773 }
774
775 #ifndef ONLY64
776 /**
777 * This function generates pseudorandom 32-bit integers in the
778 * specified array[] by one call. The number of pseudorandom integers
779 * is specified by the argument size, which must be at least 624 and a
780 * multiple of four. The generation by this function is much faster
781 * than the following gen_rand function.
782 *
783 * For initialization, init_gen_rand or init_by_array must be called
784 * before the first call of this function. This function can not be
785 * used after calling gen_rand function, without initialization.
786 *
787 * @param array an array where pseudorandom 32-bit integers are filled
788 * by this function. The pointer to the array must be \b "aligned"
789 * (namely, must be a multiple of 16) in the SIMD version, since it
790 * refers to the address of a 128-bit integer. In the standard C
791 * version, the pointer is arbitrary.
792 *
793 * @param size the number of 32-bit pseudorandom integers to be
794 * generated. size must be a multiple of 4, and greater than or equal
795 * to (MEXP / 128 + 1) * 4.
796 *
797 * @note \b memalign or \b posix_memalign is available to get aligned
798 * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
799 * returns the pointer to the aligned memory block.
800 */
801 void
802 fill_array32 (uint32_t * array, int size)
803 {
804 assert (initialized);
805 assert (idx == N32);
806 assert (size % 4 == 0);
807 assert (size >= N32);
808
809 gen_rand_array (reinterpret_cast<w128_t *> (array), size / 4);
810 idx = N32;
811 }
812 #endif
813
814 /**
815 * This function generates pseudorandom 64-bit integers in the
816 * specified array[] by one call. The number of pseudorandom integers
817 * is specified by the argument size, which must be at least 312 and a
818 * multiple of two. The generation by this function is much faster
819 * than the following gen_rand function.
820 *
821 * For initialization, init_gen_rand or init_by_array must be called
822 * before the first call of this function. This function can not be
823 * used after calling gen_rand function, without initialization.
824 *
825 * @param array an array where pseudorandom 64-bit integers are filled
826 * by this function. The pointer to the array must be "aligned"
827 * (namely, must be a multiple of 16) in the SIMD version, since it
828 * refers to the address of a 128-bit integer. In the standard C
829 * version, the pointer is arbitrary.
830 *
831 * @param size the number of 64-bit pseudorandom integers to be
832 * generated. size must be a multiple of 2, and greater than or equal
833 * to (MEXP / 128 + 1) * 2
834 *
835 * @note \b memalign or \b posix_memalign is available to get aligned
836 * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
837 * returns the pointer to the aligned memory block.
838 */
839 void
840 fill_array64 (uint64_t * array, int size)
841 {
842 assert (initialized);
843 assert (idx == N32);
844 assert (size % 2 == 0);
845 assert (size >= N64);
846
847 gen_rand_array (reinterpret_cast<w128_t *> (array), size / 2);
848 idx = N32;
849
850 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
851 swap (reinterpret_cast<w128_t *> (array), size / 2);
852 #endif
853 }
854
855 /**
856 * This function initializes the internal state array with a 32-bit
857 * integer seed.
858 *
859 * @param seed a 32-bit integer used as the seed.
860 */
861 void
862 init_gen_rand (uint32_t seed)
863 {
864 int i;
865
866 psfmt32[idxof (0)] = seed;
867 for (i = 1; i < N32; i++)
868 {
869 psfmt32[idxof (i)] = 1812433253UL * (psfmt32[idxof (i - 1)] ^ (psfmt32[idxof (i - 1)] >> 30)) + i;
870 }
871 idx = N32;
872 period_certification ();
873 initialized = 1;
874 }
875
876 /**
877 * This function initializes the internal state array,
878 * with an array of 32-bit integers used as the seeds
879 * @param init_key the array of 32-bit integers, used as a seed.
880 * @param key_length the length of init_key.
881 */
882 void
883 init_by_array (uint32_t * init_key, int key_length)
884 {
885 int i, j, count;
886 uint32_t r;
887 int lag;
888 int mid;
889 int size = N * 4;
890
891 if (size >= 623)
892 {
893 lag = 11;
894 }
895 else if (size >= 68)
896 {
897 lag = 7;
898 }
899 else if (size >= 39)
900 {
901 lag = 5;
902 }
903 else
904 {
905 lag = 3;
906 }
907 mid = (size - lag) / 2;
908
909 memset (sfmt, 0x8b, sizeof (sfmt));
910 if (key_length + 1 > N32)
911 {
912 count = key_length + 1;
913 }
914 else
915 {
916 count = N32;
917 }
918 r = func1 (psfmt32[idxof (0)] ^ psfmt32[idxof (mid)] ^ psfmt32[idxof (N32 - 1)]);
919 psfmt32[idxof (mid)] += r;
920 r += key_length;
921 psfmt32[idxof (mid + lag)] += r;
922 psfmt32[idxof (0)] = r;
923
924 count--;
925 for (i = 1, j = 0; (j < count) && (j < key_length); j++)
926 {
927 r = func1 (psfmt32[idxof (i)] ^ psfmt32[idxof ((i + mid) % N32)] ^ psfmt32[idxof ((i + N32 - 1) % N32)]);
928 psfmt32[idxof ((i + mid) % N32)] += r;
929 r += init_key[j] + i;
930 psfmt32[idxof ((i + mid + lag) % N32)] += r;
931 psfmt32[idxof (i)] = r;
932 i = (i + 1) % N32;
933 }
934 for (; j < count; j++)
935 {
936 r = func1 (psfmt32[idxof (i)] ^ psfmt32[idxof ((i + mid) % N32)] ^ psfmt32[idxof ((i + N32 - 1) % N32)]);
937 psfmt32[idxof ((i + mid) % N32)] += r;
938 r += i;
939 psfmt32[idxof ((i + mid + lag) % N32)] += r;
940 psfmt32[idxof (i)] = r;
941 i = (i + 1) % N32;
942 }
943 for (j = 0; j < N32; j++)
944 {
945 r = func2 (psfmt32[idxof (i)] + psfmt32[idxof ((i + mid) % N32)] + psfmt32[idxof ((i + N32 - 1) % N32)]);
946 psfmt32[idxof ((i + mid) % N32)] ^= r;
947 r -= i;
948 psfmt32[idxof ((i + mid + lag) % N32)] ^= r;
949 psfmt32[idxof (i)] = r;
950 i = (i + 1) % N32;
951 }
952
953 idx = N32;
954 period_certification ();
955 initialized = 1;
956 }