ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/ermyth/src/random.C
Revision: 1.4
Committed: Sat Sep 22 14:27:30 2007 UTC (16 years, 8 months ago) by pippijn
Content type: text/plain
Branch: MAIN
CVS Tags: HEAD
Changes since 1.3: +1 -1 lines
State: FILE REMOVED
Log Message:
split up ermyth into ermyth-modules, libermyth (currently just ermyth-util) and ermyth-core

File Contents

# User Rev Content
1 pippijn 1.2 /**
2 pippijn 1.1 * random.C: SIMD oriented Fast Mersenne Twister(SFMT)
3     *
4 pippijn 1.3 * Copyright © 2007 Pippijn van Steenhoven / The Ermyth Team
5     * Rights to this code are as documented in COPYING.
6     *
7     *
8     * Portions of this file were derived from sources bearing the following license:
9     *
10 pippijn 1.1 * Copyright © 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
11     * University. All rights reserved.
12     *
13     * @author Mutsuo Saito (Hiroshima University)
14     * @author Makoto Matsumoto (Hiroshima University)
15     *
16 pippijn 1.3 * The new BSD License is applied to this software.
17 pippijn 1.1 */
18 pippijn 1.2
19 pippijn 1.4 static char const rcsid[] = "$Id: random.C,v 1.3 2007-09-16 18:54:45 pippijn Exp $";
20 pippijn 1.2
21 pippijn 1.1 #include <string.h>
22     #include <assert.h>
23    
24     #include <common/random.h>
25    
26     /*-----------------
27     BASIC DEFINITIONS
28     -----------------*/
29     /** SFMT generator has an internal state array of 128-bit integers,
30     * and N is its size. */
31     #define N (216091 / 128 + 1)
32     /** N32 is the size of internal state array when regarded as an array
33     * of 32-bit integers.*/
34     #define N32 (N * 4)
35     /** N64 is the size of internal state array when regarded as an array
36     * of 64-bit integers.*/
37     #define N64 (N * 2)
38    
39     /*----------------------
40     the parameters of SFMT
41     following definitions are in paramsXXXX.h file.
42     ----------------------*/
43     // the pick up position of the array.
44     #define POS1 627
45     // the parameter of shift left as four 32-bit registers.
46     #define SL1 11
47     // the parameter of shift left as one 128-bit register.
48     // The 128-bit integer is shifted by (SL2 * 8) bits.
49     #define SL2 3
50     // the parameter of shift right as four 32-bit registers.
51     #define SR1 10
52     // the parameter of shift right as one 128-bit register.
53     // The 128-bit integer is shifted by (SL2 * 8) bits.
54     #define SR2 1
55     // A bitmask, used in the recursion. These parameters are introduced
56     // to break symmetry of SIMD.
57     #define MSK1 0xbff7bff7U
58     #define MSK2 0xbfffffffU
59     #define MSK3 0xbffffa7fU
60     #define MSK4 0xffddfbfbU
61     // These definitions are part of a 128-bit period certification vector.
62     #define PARITY1 0xf8000001U
63     #define PARITY2 0x89e80709U
64     #define PARITY3 0x3bd2b64bU
65     #define PARITY4 0x0c64b1e4U
66    
67     /* PARAMETERS FOR ALTIVEC */
68     #if defined(__APPLE__) /* For OSX */
69     #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
70     #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
71     #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
72     #define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
73     #define ALTI_SL2_PERM (vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
74     #define ALTI_SL2_PERM64 (vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
75     #define ALTI_SR2_PERM (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
76     #define ALTI_SR2_PERM64 (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
77     #else // !__APPLE__
78     #define ALTI_SL1 {SL1, SL1, SL1, SL1}
79     #define ALTI_SR1 {SR1, SR1, SR1, SR1}
80     #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4}
81     #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3}
82     #define ALTI_SL2_PERM { 3,21,21,21, 7, 0, 1, 2,11, 4, 5, 6,15, 8, 9,10}
83     #define ALTI_SL2_PERM64 { 3, 4, 5, 6, 7,29,29,29,11,12,13,14,15, 0, 1, 2}
84     #define ALTI_SR2_PERM { 7, 0, 1, 2,11, 4, 5, 6,15, 8, 9,10,17,12,13,14}
85     #define ALTI_SR2_PERM64 {15, 0, 1, 2, 3, 4, 5, 6,17, 8, 9,10,11,12,13,14}
86     #endif // __APPLE__
87     #define IDSTR "SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb"
88    
89     #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)
90     #define BIG_ENDIAN64 1
91     #endif
92     #if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64)
93     #define BIG_ENDIAN64 1
94     #endif
95     #if defined(ONLY64) && !defined(BIG_ENDIAN64)
96     #if defined(__GNUC__)
97     #error "-DONLY64 must be specified with -DBIG_ENDIAN64"
98     #endif
99     #undef ONLY64
100     #endif
101    
102     /*--------------------------------------
103     FILE GLOBAL VARIABLES
104     internal state, index counter and flag
105     --------------------------------------*/
106     /** the 128-bit internal state array */
107     static w128_t sfmt[N];
108     /** the 32bit integer pointer to the 128-bit internal state array */
109     static uint32_t *psfmt32 = &sfmt[0].u[0];
110     #if !defined(BIG_ENDIAN64) || defined(ONLY64)
111     /** the 64bit integer pointer to the 128-bit internal state array */
112     static uint64_t *psfmt64 = reinterpret_cast<uint64_t *> (&sfmt[0].u[0]);
113     #endif
114     /** index counter to the 32-bit internal state array */
115     static int idx;
116     /** a flag: it is 0 if and only if the internal state is not yet
117     * initialized. */
118     static int initialized = 0;
119     /** a parity check vector which certificate the period of 2^{MEXP} */
120     static uint32_t parity[4] = { PARITY1, PARITY2, PARITY3, PARITY4 };
121    
122     /*----------------
123     STATIC FUNCTIONS
124     ----------------*/
125     inline static int idxof (int i);
126     inline static void rshift128 (w128_t *out, w128_t const *in, int shift);
127     inline static void lshift128 (w128_t *out, w128_t const *in, int shift);
128     inline static void gen_rand_all (void);
129     inline static void gen_rand_array (w128_t *array, int size);
130     inline static uint32_t func1 (uint32_t x);
131     inline static uint32_t func2 (uint32_t x);
132     inline static void period_certification (void);
133     #if defined(BIG_ENDIAN64) && !defined(ONLY64)
134     inline static void swap (w128_t *array, int size);
135     #endif
136    
137     #if defined(HAVE_ALTIVEC)
138     inline static vector unsigned int
139     vec_recursion (vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d) ALWAYSINLINE;
140    
141     /**
142     * This function represents the recursion formula in AltiVec and BIG ENDIAN.
143     * @param a a 128-bit part of the interal state array
144     * @param b a 128-bit part of the interal state array
145     * @param c a 128-bit part of the interal state array
146     * @param d a 128-bit part of the interal state array
147     * @return output
148     */
149     inline static vector unsigned int vec_recursion (vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d)
150     {
151     const vector unsigned int sl1 = ALTI_SL1;
152     const vector unsigned int sr1 = ALTI_SR1;
153     #ifdef ONLY64
154     const vector unsigned int mask = ALTI_MSK64;
155     const vector unsigned char perm_sl = ALTI_SL2_PERM64;
156     const vector unsigned char perm_sr = ALTI_SR2_PERM64;
157     #else
158     const vector unsigned int mask = ALTI_MSK;
159     const vector unsigned char perm_sl = ALTI_SL2_PERM;
160     const vector unsigned char perm_sr = ALTI_SR2_PERM;
161     #endif
162     vector unsigned int v, w, x, y, z;
163     x = vec_perm (a, (vector unsigned int) perm_sl, perm_sl);
164     v = a;
165     y = vec_sr (b, sr1);
166     z = vec_perm (c, (vector unsigned int) perm_sr, perm_sr);
167     w = vec_sl (d, sl1);
168     z = vec_xor (z, w);
169     y = vec_and (y, mask);
170     v = vec_xor (v, x);
171     z = vec_xor (z, y);
172     z = vec_xor (z, v);
173     return z;
174     }
175    
176     /**
177     * This function fills the internal state array with pseudorandom
178     * integers.
179     */
180     inline static void
181     gen_rand_all (void)
182     {
183     int i;
184     vector unsigned int r, r1, r2;
185    
186     r1 = sfmt[N - 2].s;
187     r2 = sfmt[N - 1].s;
188     for (i = 0; i < N - POS1; i++)
189     {
190     r = vec_recursion (sfmt[i].s, sfmt[i + POS1].s, r1, r2);
191     sfmt[i].s = r;
192     r1 = r2;
193     r2 = r;
194     }
195     for (; i < N; i++)
196     {
197     r = vec_recursion (sfmt[i].s, sfmt[i + POS1 - N].s, r1, r2);
198     sfmt[i].s = r;
199     r1 = r2;
200     r2 = r;
201     }
202     }
203    
204     /**
205     * This function fills the user-specified array with pseudorandom
206     * integers.
207     *
208     * @param array an 128-bit array to be filled by pseudorandom numbers.
209     * @param size number of 128-bit pesudorandom numbers to be generated.
210     */
211     inline static void
212     gen_rand_array (w128_t *array, int size)
213     {
214     int i, j;
215     vector unsigned int r, r1, r2;
216    
217     r1 = sfmt[N - 2].s;
218     r2 = sfmt[N - 1].s;
219     for (i = 0; i < N - POS1; i++)
220     {
221     r = vec_recursion (sfmt[i].s, sfmt[i + POS1].s, r1, r2);
222     array[i].s = r;
223     r1 = r2;
224     r2 = r;
225     }
226     for (; i < N; i++)
227     {
228     r = vec_recursion (sfmt[i].s, array[i + POS1 - N].s, r1, r2);
229     array[i].s = r;
230     r1 = r2;
231     r2 = r;
232     }
233     /* main loop */
234     for (; i < size - N; i++)
235     {
236     r = vec_recursion (array[i - N].s, array[i + POS1 - N].s, r1, r2);
237     array[i].s = r;
238     r1 = r2;
239     r2 = r;
240     }
241     for (j = 0; j < 2 * N - size; j++)
242     {
243     sfmt[j].s = array[j + size - N].s;
244     }
245     for (; i < size; i++)
246     {
247     r = vec_recursion (array[i - N].s, array[i + POS1 - N].s, r1, r2);
248     array[i].s = r;
249     sfmt[j++].s = r;
250     r1 = r2;
251     r2 = r;
252     }
253     }
254    
255     #ifndef ONLY64
256     #if defined(__APPLE__)
257     #define ALTI_SWAP (vector unsigned char) \
258     (4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)
259     #else
260     #define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}
261     #endif
262     /**
263     * This function swaps high and low 32-bit of 64-bit integers in user
264     * specified array.
265     *
266     * @param array an 128-bit array to be swaped.
267     * @param size size of 128-bit array.
268     */
269     inline static void
270     swap (w128_t *array, int size)
271     {
272     int i;
273     const vector unsigned char perm = ALTI_SWAP;
274    
275     for (i = 0; i < size; i++)
276     {
277     array[i].s = vec_perm (array[i].s, (vector unsigned int) perm, perm);
278     }
279     }
280     #endif
281     #elif defined(HAVE_SSE2)
282     inline static __m128i
283     mm_recursion (__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask);
284    
285     /**
286     * This function represents the recursion formula.
287     * @param a a 128-bit part of the interal state array
288     * @param b a 128-bit part of the interal state array
289     * @param c a 128-bit part of the interal state array
290     * @param d a 128-bit part of the interal state array
291     * @param mask 128-bit mask
292     * @return output
293     */
294     inline static __m128i mm_recursion (__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask)
295     {
296     __m128i v, x, y, z;
297    
298     x = _mm_load_si128 (a);
299     y = _mm_srli_epi32 (*b, SR1);
300     z = _mm_srli_si128 (c, SR2);
301     v = _mm_slli_epi32 (d, SL1);
302     z = _mm_xor_si128 (z, x);
303     z = _mm_xor_si128 (z, v);
304     x = _mm_slli_si128 (x, SL2);
305     y = _mm_and_si128 (y, mask);
306     z = _mm_xor_si128 (z, x);
307     z = _mm_xor_si128 (z, y);
308     return z;
309     }
310    
311     /**
312     * This function fills the internal state array with pseudorandom
313     * integers.
314     */
315     inline static void
316     gen_rand_all (void)
317     {
318     int i;
319     __m128i r, r1, r2, mask;
320     mask = _mm_set_epi32 (MSK4, MSK3, MSK2, MSK1);
321    
322     r1 = _mm_load_si128 (&sfmt[N - 2].si);
323     r2 = _mm_load_si128 (&sfmt[N - 1].si);
324     for (i = 0; i < N - POS1; i++)
325     {
326     r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask);
327     _mm_store_si128 (&sfmt[i].si, r);
328     r1 = r2;
329     r2 = r;
330     }
331     for (; i < N; i++)
332     {
333     r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1 - N].si, r1, r2, mask);
334     _mm_store_si128 (&sfmt[i].si, r);
335     r1 = r2;
336     r2 = r;
337     }
338     }
339    
340     /**
341     * This function fills the user-specified array with pseudorandom
342     * integers.
343     *
344     * @param array an 128-bit array to be filled by pseudorandom numbers.
345     * @param size number of 128-bit pesudorandom numbers to be generated.
346     */
347     inline static void
348     gen_rand_array (w128_t *array, int size)
349     {
350     int i, j;
351     __m128i r, r1, r2, mask;
352     mask = _mm_set_epi32 (MSK4, MSK3, MSK2, MSK1);
353    
354     r1 = _mm_load_si128 (&sfmt[N - 2].si);
355     r2 = _mm_load_si128 (&sfmt[N - 1].si);
356     for (i = 0; i < N - POS1; i++)
357     {
358     r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask);
359     _mm_store_si128 (&array[i].si, r);
360     r1 = r2;
361     r2 = r;
362     }
363     for (; i < N; i++)
364     {
365     r = mm_recursion (&sfmt[i].si, &array[i + POS1 - N].si, r1, r2, mask);
366     _mm_store_si128 (&array[i].si, r);
367     r1 = r2;
368     r2 = r;
369     }
370     /* main loop */
371     for (; i < size - N; i++)
372     {
373     r = mm_recursion (&array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
374     _mm_store_si128 (&array[i].si, r);
375     r1 = r2;
376     r2 = r;
377     }
378     for (j = 0; j < 2 * N - size; j++)
379     {
380     r = _mm_load_si128 (&array[j + size - N].si);
381     _mm_store_si128 (&sfmt[j].si, r);
382     }
383     for (; i < size; i++)
384     {
385     r = mm_recursion (&array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
386     _mm_store_si128 (&array[i].si, r);
387     _mm_store_si128 (&sfmt[j++].si, r);
388     r1 = r2;
389     r2 = r;
390     }
391     }
392     #endif
393    
394     /**
395     * This function simulate a 64-bit index of LITTLE ENDIAN
396     * in BIG ENDIAN machine.
397     */
398     #ifdef ONLY64
399     inline static int
400     idxof (int i)
401     {
402     return i ^ 1;
403     }
404     #else
405     inline static int
406     idxof (int i)
407     {
408     return i;
409     }
410     #endif
411     /**
412     * This function simulates SIMD 128-bit right shift by the standard C.
413     * The 128-bit integer given in in is shifted by (shift * 8) bits.
414     * This function simulates the LITTLE ENDIAN SIMD.
415     * @param out the output of this function
416     * @param in the 128-bit data to be shifted
417     * @param shift the shift value
418     */
419     #ifdef ONLY64
420     inline static void
421     rshift128 (w128_t *out, w128_t const *in, int shift)
422     {
423     uint64_t th, tl, oh, ol;
424    
425     th = ((uint64_t) in->u[2] << 32) | ((uint64_t) in->u[3]);
426     tl = ((uint64_t) in->u[0] << 32) | ((uint64_t) in->u[1]);
427    
428     oh = th >> (shift * 8);
429     ol = tl >> (shift * 8);
430     ol |= th << (64 - shift * 8);
431     out->u[0] = (uint32_t) (ol >> 32);
432     out->u[1] = (uint32_t) ol;
433     out->u[2] = (uint32_t) (oh >> 32);
434     out->u[3] = (uint32_t) oh;
435     }
436     #else
437     inline static void
438     rshift128 (w128_t *out, w128_t const *in, int shift)
439     {
440     uint64_t th, tl, oh, ol;
441    
442     th = ((uint64_t) in->u[3] << 32) | ((uint64_t) in->u[2]);
443     tl = ((uint64_t) in->u[1] << 32) | ((uint64_t) in->u[0]);
444    
445     oh = th >> (shift * 8);
446     ol = tl >> (shift * 8);
447     ol |= th << (64 - shift * 8);
448     out->u[1] = (uint32_t) (ol >> 32);
449     out->u[0] = (uint32_t) ol;
450     out->u[3] = (uint32_t) (oh >> 32);
451     out->u[2] = (uint32_t) oh;
452     }
453     #endif
454     /**
455     * This function simulates SIMD 128-bit left shift by the standard C.
456     * The 128-bit integer given in in is shifted by (shift * 8) bits.
457     * This function simulates the LITTLE ENDIAN SIMD.
458     * @param out the output of this function
459     * @param in the 128-bit data to be shifted
460     * @param shift the shift value
461     */
462     #ifdef ONLY64
463     inline static void
464     lshift128 (w128_t *out, w128_t const *in, int shift)
465     {
466     uint64_t th, tl, oh, ol;
467    
468     th = ((uint64_t) in->u[2] << 32) | ((uint64_t) in->u[3]);
469     tl = ((uint64_t) in->u[0] << 32) | ((uint64_t) in->u[1]);
470    
471     oh = th << (shift * 8);
472     ol = tl << (shift * 8);
473     oh |= tl >> (64 - shift * 8);
474     out->u[0] = (uint32_t) (ol >> 32);
475     out->u[1] = (uint32_t) ol;
476     out->u[2] = (uint32_t) (oh >> 32);
477     out->u[3] = (uint32_t) oh;
478     }
479     #else
480     inline static void
481     lshift128 (w128_t *out, w128_t const *in, int shift)
482     {
483     uint64_t th, tl, oh, ol;
484    
485     th = ((uint64_t) in->u[3] << 32) | ((uint64_t) in->u[2]);
486     tl = ((uint64_t) in->u[1] << 32) | ((uint64_t) in->u[0]);
487    
488     oh = th << (shift * 8);
489     ol = tl << (shift * 8);
490     oh |= tl >> (64 - shift * 8);
491     out->u[1] = (uint32_t) (ol >> 32);
492     out->u[0] = (uint32_t) ol;
493     out->u[3] = (uint32_t) (oh >> 32);
494     out->u[2] = (uint32_t) oh;
495     }
496     #endif
497    
498     /**
499     * This function represents the recursion formula.
500     * @param r output
501     * @param a a 128-bit part of the internal state array
502     * @param b a 128-bit part of the internal state array
503     * @param c a 128-bit part of the internal state array
504     * @param d a 128-bit part of the internal state array
505     */
506     #ifdef ONLY64
507     inline static void
508     do_recursion (w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d)
509     {
510     w128_t x;
511     w128_t y;
512    
513     lshift128 (&x, a, SL2);
514     rshift128 (&y, c, SR2);
515     r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0] ^ (d->u[0] << SL1);
516     r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1] ^ (d->u[1] << SL1);
517     r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2] ^ (d->u[2] << SL1);
518     r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3] ^ (d->u[3] << SL1);
519     }
520     #else
521     inline static void
522     do_recursion (w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d)
523     {
524     w128_t x;
525     w128_t y;
526    
527     lshift128 (&x, a, SL2);
528     rshift128 (&y, c, SR2);
529     r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0] ^ (d->u[0] << SL1);
530     r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1] ^ (d->u[1] << SL1);
531     r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2] ^ (d->u[2] << SL1);
532     r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3] ^ (d->u[3] << SL1);
533     }
534     #endif
535    
536     #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
537     /**
538     * This function fills the internal state array with pseudorandom
539     * integers.
540     */
541     inline static void
542     gen_rand_all (void)
543     {
544     int i;
545     w128_t *r1, *r2;
546    
547     r1 = &sfmt[N - 2];
548     r2 = &sfmt[N - 1];
549     for (i = 0; i < N - POS1; i++)
550     {
551     do_recursion (&sfmt[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
552     r1 = r2;
553     r2 = &sfmt[i];
554     }
555     for (; i < N; i++)
556     {
557     do_recursion (&sfmt[i], &sfmt[i], &sfmt[i + POS1 - N], r1, r2);
558     r1 = r2;
559     r2 = &sfmt[i];
560     }
561     }
562    
563     /**
564     * This function fills the user-specified array with pseudorandom
565     * integers.
566     *
567     * @param array an 128-bit array to be filled by pseudorandom numbers.
568     * @param size number of 128-bit pseudorandom numbers to be generated.
569     */
570     inline static void
571     gen_rand_array (w128_t *array, int size)
572     {
573     int i, j;
574     w128_t *r1, *r2;
575    
576     r1 = &sfmt[N - 2];
577     r2 = &sfmt[N - 1];
578     for (i = 0; i < N - POS1; i++)
579     {
580     do_recursion (&array[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
581     r1 = r2;
582     r2 = &array[i];
583     }
584     for (; i < N; i++)
585     {
586     do_recursion (&array[i], &sfmt[i], &array[i + POS1 - N], r1, r2);
587     r1 = r2;
588     r2 = &array[i];
589     }
590     for (; i < size - N; i++)
591     {
592     do_recursion (&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
593     r1 = r2;
594     r2 = &array[i];
595     }
596     for (j = 0; j < 2 * N - size; j++)
597     {
598     sfmt[j] = array[j + size - N];
599     }
600     for (; i < size; i++, j++)
601     {
602     do_recursion (&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
603     r1 = r2;
604     r2 = &array[i];
605     sfmt[j] = array[i];
606     }
607     }
608     #endif
609    
610     #if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
611     inline static void
612     swap (w128_t *array, int size)
613     {
614     int i;
615     uint32_t x, y;
616    
617     for (i = 0; i < size; i++)
618     {
619     x = array[i].u[0];
620     y = array[i].u[2];
621     array[i].u[0] = array[i].u[1];
622     array[i].u[2] = array[i].u[3];
623     array[i].u[1] = x;
624     array[i].u[3] = y;
625     }
626     }
627     #endif
628     /**
629     * This function represents a function used in the initialization
630     * by init_by_array
631     * @param x 32-bit integer
632     * @return 32-bit integer
633     */
634     inline static uint32_t
635     func1 (uint32_t x)
636     {
637     return (x ^ (x >> 27)) * (uint32_t) 1664525UL;
638     }
639    
640     /**
641     * This function represents a function used in the initialization
642     * by init_by_array
643     * @param x 32-bit integer
644     * @return 32-bit integer
645     */
646     inline static uint32_t
647     func2 (uint32_t x)
648     {
649     return (x ^ (x >> 27)) * (uint32_t) 1566083941UL;
650     }
651    
652     /**
653     * This function certificate the period of 2^{MEXP}
654     */
655     static void
656     period_certification (void)
657     {
658     int inner = 0;
659     int i, j;
660     uint32_t work;
661    
662     for (i = 0; i < 4; i++)
663     inner ^= psfmt32[idxof (i)] & parity[i];
664     for (i = 16; i > 0; i >>= 1)
665     inner ^= inner >> i;
666     inner &= 1;
667     /* check OK */
668     if (inner == 1)
669     {
670     return;
671     }
672     /* check NG, and modification */
673     for (i = 0; i < 4; i++)
674     {
675     work = 1;
676     for (j = 0; j < 32; j++)
677     {
678     if ((work & parity[i]) != 0)
679     {
680     psfmt32[idxof (i)] ^= work;
681     return;
682     }
683     work = work << 1;
684     }
685     }
686     }
687    
688     /*----------------
689     PUBLIC FUNCTIONS
690     ----------------*/
691     /**
692     * This function returns the identification string.
693     * The string shows the word size, the Mersenne exponent,
694     * and all parameters of this generator.
695     */
696     char const * const
697     get_idstring (void)
698     {
699     return IDSTR;
700     }
701    
702     /**
703     * This function returns the minimum size of array used for \b
704     * fill_array32() function.
705     * @return minimum size of array used for fill_array32() function.
706     */
707     int
708     get_min_array_size32 (void)
709     {
710     return N32;
711     }
712    
713     /**
714     * This function returns the minimum size of array used for \b
715     * fill_array64() function.
716     * @return minimum size of array used for fill_array64() function.
717     */
718     int
719     get_min_array_size64 (void)
720     {
721     return N64;
722     }
723    
724     #ifndef ONLY64
725     /**
726     * This function generates and returns 32-bit pseudorandom number.
727     * init_gen_rand or init_by_array must be called before this function.
728     * @return 32-bit pseudorandom number
729     */
730     uint32_t
731     gen_rand32 (void)
732     {
733     uint32_t r;
734    
735     assert (initialized);
736     if (idx >= N32)
737     {
738     gen_rand_all ();
739     idx = 0;
740     }
741     r = psfmt32[idx++];
742     return r;
743     }
744     #endif
745     /**
746     * This function generates and returns 64-bit pseudorandom number.
747     * init_gen_rand or init_by_array must be called before this function.
748     * The function gen_rand64 should not be called after gen_rand32,
749     * unless an initialization is again executed.
750     * @return 64-bit pseudorandom number
751     */
752     uint64_t
753     gen_rand64 (void)
754     {
755     #if defined(BIG_ENDIAN64) && !defined(ONLY64)
756     uint32_t r1, r2;
757     #else
758     uint64_t r;
759     #endif
760    
761     assert (initialized);
762     assert (idx % 2 == 0);
763    
764     if (idx >= N32)
765     {
766     gen_rand_all ();
767     idx = 0;
768     }
769     #if defined(BIG_ENDIAN64) && !defined(ONLY64)
770     r1 = psfmt32[idx];
771     r2 = psfmt32[idx + 1];
772     idx += 2;
773     return ((uint64_t) r2 << 32) | r1;
774     #else
775     r = psfmt64[idx / 2];
776     idx += 2;
777     return r;
778     #endif
779     }
780    
781     #ifndef ONLY64
782     /**
783     * This function generates pseudorandom 32-bit integers in the
784     * specified array[] by one call. The number of pseudorandom integers
785     * is specified by the argument size, which must be at least 624 and a
786     * multiple of four. The generation by this function is much faster
787     * than the following gen_rand function.
788     *
789     * For initialization, init_gen_rand or init_by_array must be called
790     * before the first call of this function. This function can not be
791     * used after calling gen_rand function, without initialization.
792     *
793     * @param array an array where pseudorandom 32-bit integers are filled
794     * by this function. The pointer to the array must be \b "aligned"
795     * (namely, must be a multiple of 16) in the SIMD version, since it
796     * refers to the address of a 128-bit integer. In the standard C
797     * version, the pointer is arbitrary.
798     *
799     * @param size the number of 32-bit pseudorandom integers to be
800     * generated. size must be a multiple of 4, and greater than or equal
801     * to (MEXP / 128 + 1) * 4.
802     *
803     * @note \b memalign or \b posix_memalign is available to get aligned
804     * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
805     * returns the pointer to the aligned memory block.
806     */
807     void
808     fill_array32 (uint32_t * array, int size)
809     {
810     assert (initialized);
811     assert (idx == N32);
812     assert (size % 4 == 0);
813     assert (size >= N32);
814    
815     gen_rand_array (reinterpret_cast<w128_t *> (array), size / 4);
816     idx = N32;
817     }
818     #endif
819    
820     /**
821     * This function generates pseudorandom 64-bit integers in the
822     * specified array[] by one call. The number of pseudorandom integers
823     * is specified by the argument size, which must be at least 312 and a
824     * multiple of two. The generation by this function is much faster
825     * than the following gen_rand function.
826     *
827     * For initialization, init_gen_rand or init_by_array must be called
828     * before the first call of this function. This function can not be
829     * used after calling gen_rand function, without initialization.
830     *
831     * @param array an array where pseudorandom 64-bit integers are filled
832     * by this function. The pointer to the array must be "aligned"
833     * (namely, must be a multiple of 16) in the SIMD version, since it
834     * refers to the address of a 128-bit integer. In the standard C
835     * version, the pointer is arbitrary.
836     *
837     * @param size the number of 64-bit pseudorandom integers to be
838     * generated. size must be a multiple of 2, and greater than or equal
839     * to (MEXP / 128 + 1) * 2
840     *
841     * @note \b memalign or \b posix_memalign is available to get aligned
842     * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
843     * returns the pointer to the aligned memory block.
844     */
845     void
846     fill_array64 (uint64_t * array, int size)
847     {
848     assert (initialized);
849     assert (idx == N32);
850     assert (size % 2 == 0);
851     assert (size >= N64);
852    
853     gen_rand_array (reinterpret_cast<w128_t *> (array), size / 2);
854     idx = N32;
855    
856     #if defined(BIG_ENDIAN64) && !defined(ONLY64)
857     swap (reinterpret_cast<w128_t *> (array), size / 2);
858     #endif
859     }
860    
861     /**
862     * This function initializes the internal state array with a 32-bit
863     * integer seed.
864     *
865     * @param seed a 32-bit integer used as the seed.
866     */
867     void
868     init_gen_rand (uint32_t seed)
869     {
870     int i;
871    
872     psfmt32[idxof (0)] = seed;
873     for (i = 1; i < N32; i++)
874     {
875     psfmt32[idxof (i)] = 1812433253UL * (psfmt32[idxof (i - 1)] ^ (psfmt32[idxof (i - 1)] >> 30)) + i;
876     }
877     idx = N32;
878     period_certification ();
879     initialized = 1;
880     }
881    
882     /**
883     * This function initializes the internal state array,
884     * with an array of 32-bit integers used as the seeds
885     * @param init_key the array of 32-bit integers, used as a seed.
886     * @param key_length the length of init_key.
887     */
888     void
889     init_by_array (uint32_t * init_key, int key_length)
890     {
891     int i, j, count;
892     uint32_t r;
893     int lag;
894     int mid;
895     int size = N * 4;
896    
897     if (size >= 623)
898     {
899     lag = 11;
900     }
901     else if (size >= 68)
902     {
903     lag = 7;
904     }
905     else if (size >= 39)
906     {
907     lag = 5;
908     }
909     else
910     {
911     lag = 3;
912     }
913     mid = (size - lag) / 2;
914    
915     memset (sfmt, 0x8b, sizeof (sfmt));
916     if (key_length + 1 > N32)
917     {
918     count = key_length + 1;
919     }
920     else
921     {
922     count = N32;
923     }
924     r = func1 (psfmt32[idxof (0)] ^ psfmt32[idxof (mid)] ^ psfmt32[idxof (N32 - 1)]);
925     psfmt32[idxof (mid)] += r;
926     r += key_length;
927     psfmt32[idxof (mid + lag)] += r;
928     psfmt32[idxof (0)] = r;
929    
930     count--;
931     for (i = 1, j = 0; (j < count) && (j < key_length); j++)
932     {
933     r = func1 (psfmt32[idxof (i)] ^ psfmt32[idxof ((i + mid) % N32)] ^ psfmt32[idxof ((i + N32 - 1) % N32)]);
934     psfmt32[idxof ((i + mid) % N32)] += r;
935     r += init_key[j] + i;
936     psfmt32[idxof ((i + mid + lag) % N32)] += r;
937     psfmt32[idxof (i)] = r;
938     i = (i + 1) % N32;
939     }
940     for (; j < count; j++)
941     {
942     r = func1 (psfmt32[idxof (i)] ^ psfmt32[idxof ((i + mid) % N32)] ^ psfmt32[idxof ((i + N32 - 1) % N32)]);
943     psfmt32[idxof ((i + mid) % N32)] += r;
944     r += i;
945     psfmt32[idxof ((i + mid + lag) % N32)] += r;
946     psfmt32[idxof (i)] = r;
947     i = (i + 1) % N32;
948     }
949     for (j = 0; j < N32; j++)
950     {
951     r = func2 (psfmt32[idxof (i)] + psfmt32[idxof ((i + mid) % N32)] + psfmt32[idxof ((i + N32 - 1) % N32)]);
952     psfmt32[idxof ((i + mid) % N32)] ^= r;
953     r -= i;
954     psfmt32[idxof ((i + mid + lag) % N32)] ^= r;
955     psfmt32[idxof (i)] = r;
956     i = (i + 1) % N32;
957     }
958    
959     idx = N32;
960     period_certification ();
961     initialized = 1;
962     }