ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/ermyth/src/random.C
Revision: 1.1
Committed: Tue Aug 28 17:18:26 2007 UTC (16 years, 9 months ago) by pippijn
Content type: text/plain
Branch: MAIN
Log Message:
added new files

File Contents

# User Rev Content
1 pippijn 1.1 /*
2     * random.C: SIMD oriented Fast Mersenne Twister(SFMT)
3     *
4     * Copyright © 2007 Pippijn van Steenhoven / The Ermyth Team
5     * Copyright © 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
6     * University. All rights reserved.
7     *
8     * @author Mutsuo Saito (Hiroshima University)
9     * @author Makoto Matsumoto (Hiroshima University)
10     *
11     * The new BSD License is applied to this software, see doc/poddoc/license.pod
12     *
13     * $Id$
14     */
15     #include <string.h>
16     #include <assert.h>
17    
18     #include <common/random.h>
19    
20     /*-----------------
21     BASIC DEFINITIONS
22     -----------------*/
23     /** SFMT generator has an internal state array of 128-bit integers,
24     * and N is its size. */
25     #define N (216091 / 128 + 1)
26     /** N32 is the size of internal state array when regarded as an array
27     * of 32-bit integers.*/
28     #define N32 (N * 4)
29     /** N64 is the size of internal state array when regarded as an array
30     * of 64-bit integers.*/
31     #define N64 (N * 2)
32    
33     /*----------------------
34     the parameters of SFMT
35     following definitions are in paramsXXXX.h file.
36     ----------------------*/
37     // the pick up position of the array.
38     #define POS1 627
39     // the parameter of shift left as four 32-bit registers.
40     #define SL1 11
41     // the parameter of shift left as one 128-bit register.
42     // The 128-bit integer is shifted by (SL2 * 8) bits.
43     #define SL2 3
44     // the parameter of shift right as four 32-bit registers.
45     #define SR1 10
46     // the parameter of shift right as one 128-bit register.
47     // The 128-bit integer is shifted by (SL2 * 8) bits.
48     #define SR2 1
49     // A bitmask, used in the recursion. These parameters are introduced
50     // to break symmetry of SIMD.
51     #define MSK1 0xbff7bff7U
52     #define MSK2 0xbfffffffU
53     #define MSK3 0xbffffa7fU
54     #define MSK4 0xffddfbfbU
55     // These definitions are part of a 128-bit period certification vector.
56     #define PARITY1 0xf8000001U
57     #define PARITY2 0x89e80709U
58     #define PARITY3 0x3bd2b64bU
59     #define PARITY4 0x0c64b1e4U
60    
61     /* PARAMETERS FOR ALTIVEC */
62     #if defined(__APPLE__) /* For OSX */
63     #define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
64     #define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
65     #define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
66     #define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
67     #define ALTI_SL2_PERM (vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
68     #define ALTI_SL2_PERM64 (vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
69     #define ALTI_SR2_PERM (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
70     #define ALTI_SR2_PERM64 (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
71     #else // !__APPLE__
72     #define ALTI_SL1 {SL1, SL1, SL1, SL1}
73     #define ALTI_SR1 {SR1, SR1, SR1, SR1}
74     #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4}
75     #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3}
76     #define ALTI_SL2_PERM { 3,21,21,21, 7, 0, 1, 2,11, 4, 5, 6,15, 8, 9,10}
77     #define ALTI_SL2_PERM64 { 3, 4, 5, 6, 7,29,29,29,11,12,13,14,15, 0, 1, 2}
78     #define ALTI_SR2_PERM { 7, 0, 1, 2,11, 4, 5, 6,15, 8, 9,10,17,12,13,14}
79     #define ALTI_SR2_PERM64 {15, 0, 1, 2, 3, 4, 5, 6,17, 8, 9,10,11,12,13,14}
80     #endif // __APPLE__
81     #define IDSTR "SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb"
82    
83     #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)
84     #define BIG_ENDIAN64 1
85     #endif
86     #if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64)
87     #define BIG_ENDIAN64 1
88     #endif
89     #if defined(ONLY64) && !defined(BIG_ENDIAN64)
90     #if defined(__GNUC__)
91     #error "-DONLY64 must be specified with -DBIG_ENDIAN64"
92     #endif
93     #undef ONLY64
94     #endif
95    
96     /*--------------------------------------
97     FILE GLOBAL VARIABLES
98     internal state, index counter and flag
99     --------------------------------------*/
100     /** the 128-bit internal state array */
101     static w128_t sfmt[N];
102     /** the 32bit integer pointer to the 128-bit internal state array */
103     static uint32_t *psfmt32 = &sfmt[0].u[0];
104     #if !defined(BIG_ENDIAN64) || defined(ONLY64)
105     /** the 64bit integer pointer to the 128-bit internal state array */
106     static uint64_t *psfmt64 = reinterpret_cast<uint64_t *> (&sfmt[0].u[0]);
107     #endif
108     /** index counter to the 32-bit internal state array */
109     static int idx;
110     /** a flag: it is 0 if and only if the internal state is not yet
111     * initialized. */
112     static int initialized = 0;
113     /** a parity check vector which certificate the period of 2^{MEXP} */
114     static uint32_t parity[4] = { PARITY1, PARITY2, PARITY3, PARITY4 };
115    
116     /*----------------
117     STATIC FUNCTIONS
118     ----------------*/
119     inline static int idxof (int i);
120     inline static void rshift128 (w128_t *out, w128_t const *in, int shift);
121     inline static void lshift128 (w128_t *out, w128_t const *in, int shift);
122     inline static void gen_rand_all (void);
123     inline static void gen_rand_array (w128_t *array, int size);
124     inline static uint32_t func1 (uint32_t x);
125     inline static uint32_t func2 (uint32_t x);
126     inline static void period_certification (void);
127     #if defined(BIG_ENDIAN64) && !defined(ONLY64)
128     inline static void swap (w128_t *array, int size);
129     #endif
130    
131     #if defined(HAVE_ALTIVEC)
132     inline static vector unsigned int
133     vec_recursion (vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d) ALWAYSINLINE;
134    
135     /**
136     * This function represents the recursion formula in AltiVec and BIG ENDIAN.
137     * @param a a 128-bit part of the interal state array
138     * @param b a 128-bit part of the interal state array
139     * @param c a 128-bit part of the interal state array
140     * @param d a 128-bit part of the interal state array
141     * @return output
142     */
143     inline static vector unsigned int vec_recursion (vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d)
144     {
145     const vector unsigned int sl1 = ALTI_SL1;
146     const vector unsigned int sr1 = ALTI_SR1;
147     #ifdef ONLY64
148     const vector unsigned int mask = ALTI_MSK64;
149     const vector unsigned char perm_sl = ALTI_SL2_PERM64;
150     const vector unsigned char perm_sr = ALTI_SR2_PERM64;
151     #else
152     const vector unsigned int mask = ALTI_MSK;
153     const vector unsigned char perm_sl = ALTI_SL2_PERM;
154     const vector unsigned char perm_sr = ALTI_SR2_PERM;
155     #endif
156     vector unsigned int v, w, x, y, z;
157     x = vec_perm (a, (vector unsigned int) perm_sl, perm_sl);
158     v = a;
159     y = vec_sr (b, sr1);
160     z = vec_perm (c, (vector unsigned int) perm_sr, perm_sr);
161     w = vec_sl (d, sl1);
162     z = vec_xor (z, w);
163     y = vec_and (y, mask);
164     v = vec_xor (v, x);
165     z = vec_xor (z, y);
166     z = vec_xor (z, v);
167     return z;
168     }
169    
170     /**
171     * This function fills the internal state array with pseudorandom
172     * integers.
173     */
174     inline static void
175     gen_rand_all (void)
176     {
177     int i;
178     vector unsigned int r, r1, r2;
179    
180     r1 = sfmt[N - 2].s;
181     r2 = sfmt[N - 1].s;
182     for (i = 0; i < N - POS1; i++)
183     {
184     r = vec_recursion (sfmt[i].s, sfmt[i + POS1].s, r1, r2);
185     sfmt[i].s = r;
186     r1 = r2;
187     r2 = r;
188     }
189     for (; i < N; i++)
190     {
191     r = vec_recursion (sfmt[i].s, sfmt[i + POS1 - N].s, r1, r2);
192     sfmt[i].s = r;
193     r1 = r2;
194     r2 = r;
195     }
196     }
197    
198     /**
199     * This function fills the user-specified array with pseudorandom
200     * integers.
201     *
202     * @param array an 128-bit array to be filled by pseudorandom numbers.
203     * @param size number of 128-bit pesudorandom numbers to be generated.
204     */
205     inline static void
206     gen_rand_array (w128_t *array, int size)
207     {
208     int i, j;
209     vector unsigned int r, r1, r2;
210    
211     r1 = sfmt[N - 2].s;
212     r2 = sfmt[N - 1].s;
213     for (i = 0; i < N - POS1; i++)
214     {
215     r = vec_recursion (sfmt[i].s, sfmt[i + POS1].s, r1, r2);
216     array[i].s = r;
217     r1 = r2;
218     r2 = r;
219     }
220     for (; i < N; i++)
221     {
222     r = vec_recursion (sfmt[i].s, array[i + POS1 - N].s, r1, r2);
223     array[i].s = r;
224     r1 = r2;
225     r2 = r;
226     }
227     /* main loop */
228     for (; i < size - N; i++)
229     {
230     r = vec_recursion (array[i - N].s, array[i + POS1 - N].s, r1, r2);
231     array[i].s = r;
232     r1 = r2;
233     r2 = r;
234     }
235     for (j = 0; j < 2 * N - size; j++)
236     {
237     sfmt[j].s = array[j + size - N].s;
238     }
239     for (; i < size; i++)
240     {
241     r = vec_recursion (array[i - N].s, array[i + POS1 - N].s, r1, r2);
242     array[i].s = r;
243     sfmt[j++].s = r;
244     r1 = r2;
245     r2 = r;
246     }
247     }
248    
249     #ifndef ONLY64
250     #if defined(__APPLE__)
251     #define ALTI_SWAP (vector unsigned char) \
252     (4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)
253     #else
254     #define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}
255     #endif
256     /**
257     * This function swaps high and low 32-bit of 64-bit integers in user
258     * specified array.
259     *
260     * @param array an 128-bit array to be swaped.
261     * @param size size of 128-bit array.
262     */
263     inline static void
264     swap (w128_t *array, int size)
265     {
266     int i;
267     const vector unsigned char perm = ALTI_SWAP;
268    
269     for (i = 0; i < size; i++)
270     {
271     array[i].s = vec_perm (array[i].s, (vector unsigned int) perm, perm);
272     }
273     }
274     #endif
275     #elif defined(HAVE_SSE2)
276     inline static __m128i
277     mm_recursion (__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask);
278    
279     /**
280     * This function represents the recursion formula.
281     * @param a a 128-bit part of the interal state array
282     * @param b a 128-bit part of the interal state array
283     * @param c a 128-bit part of the interal state array
284     * @param d a 128-bit part of the interal state array
285     * @param mask 128-bit mask
286     * @return output
287     */
288     inline static __m128i mm_recursion (__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask)
289     {
290     __m128i v, x, y, z;
291    
292     x = _mm_load_si128 (a);
293     y = _mm_srli_epi32 (*b, SR1);
294     z = _mm_srli_si128 (c, SR2);
295     v = _mm_slli_epi32 (d, SL1);
296     z = _mm_xor_si128 (z, x);
297     z = _mm_xor_si128 (z, v);
298     x = _mm_slli_si128 (x, SL2);
299     y = _mm_and_si128 (y, mask);
300     z = _mm_xor_si128 (z, x);
301     z = _mm_xor_si128 (z, y);
302     return z;
303     }
304    
305     /**
306     * This function fills the internal state array with pseudorandom
307     * integers.
308     */
309     inline static void
310     gen_rand_all (void)
311     {
312     int i;
313     __m128i r, r1, r2, mask;
314     mask = _mm_set_epi32 (MSK4, MSK3, MSK2, MSK1);
315    
316     r1 = _mm_load_si128 (&sfmt[N - 2].si);
317     r2 = _mm_load_si128 (&sfmt[N - 1].si);
318     for (i = 0; i < N - POS1; i++)
319     {
320     r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask);
321     _mm_store_si128 (&sfmt[i].si, r);
322     r1 = r2;
323     r2 = r;
324     }
325     for (; i < N; i++)
326     {
327     r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1 - N].si, r1, r2, mask);
328     _mm_store_si128 (&sfmt[i].si, r);
329     r1 = r2;
330     r2 = r;
331     }
332     }
333    
334     /**
335     * This function fills the user-specified array with pseudorandom
336     * integers.
337     *
338     * @param array an 128-bit array to be filled by pseudorandom numbers.
339     * @param size number of 128-bit pesudorandom numbers to be generated.
340     */
341     inline static void
342     gen_rand_array (w128_t *array, int size)
343     {
344     int i, j;
345     __m128i r, r1, r2, mask;
346     mask = _mm_set_epi32 (MSK4, MSK3, MSK2, MSK1);
347    
348     r1 = _mm_load_si128 (&sfmt[N - 2].si);
349     r2 = _mm_load_si128 (&sfmt[N - 1].si);
350     for (i = 0; i < N - POS1; i++)
351     {
352     r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask);
353     _mm_store_si128 (&array[i].si, r);
354     r1 = r2;
355     r2 = r;
356     }
357     for (; i < N; i++)
358     {
359     r = mm_recursion (&sfmt[i].si, &array[i + POS1 - N].si, r1, r2, mask);
360     _mm_store_si128 (&array[i].si, r);
361     r1 = r2;
362     r2 = r;
363     }
364     /* main loop */
365     for (; i < size - N; i++)
366     {
367     r = mm_recursion (&array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
368     _mm_store_si128 (&array[i].si, r);
369     r1 = r2;
370     r2 = r;
371     }
372     for (j = 0; j < 2 * N - size; j++)
373     {
374     r = _mm_load_si128 (&array[j + size - N].si);
375     _mm_store_si128 (&sfmt[j].si, r);
376     }
377     for (; i < size; i++)
378     {
379     r = mm_recursion (&array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
380     _mm_store_si128 (&array[i].si, r);
381     _mm_store_si128 (&sfmt[j++].si, r);
382     r1 = r2;
383     r2 = r;
384     }
385     }
386     #endif
387    
388     /**
389     * This function simulate a 64-bit index of LITTLE ENDIAN
390     * in BIG ENDIAN machine.
391     */
392     #ifdef ONLY64
393     inline static int
394     idxof (int i)
395     {
396     return i ^ 1;
397     }
398     #else
399     inline static int
400     idxof (int i)
401     {
402     return i;
403     }
404     #endif
405     /**
406     * This function simulates SIMD 128-bit right shift by the standard C.
407     * The 128-bit integer given in in is shifted by (shift * 8) bits.
408     * This function simulates the LITTLE ENDIAN SIMD.
409     * @param out the output of this function
410     * @param in the 128-bit data to be shifted
411     * @param shift the shift value
412     */
413     #ifdef ONLY64
414     inline static void
415     rshift128 (w128_t *out, w128_t const *in, int shift)
416     {
417     uint64_t th, tl, oh, ol;
418    
419     th = ((uint64_t) in->u[2] << 32) | ((uint64_t) in->u[3]);
420     tl = ((uint64_t) in->u[0] << 32) | ((uint64_t) in->u[1]);
421    
422     oh = th >> (shift * 8);
423     ol = tl >> (shift * 8);
424     ol |= th << (64 - shift * 8);
425     out->u[0] = (uint32_t) (ol >> 32);
426     out->u[1] = (uint32_t) ol;
427     out->u[2] = (uint32_t) (oh >> 32);
428     out->u[3] = (uint32_t) oh;
429     }
430     #else
431     inline static void
432     rshift128 (w128_t *out, w128_t const *in, int shift)
433     {
434     uint64_t th, tl, oh, ol;
435    
436     th = ((uint64_t) in->u[3] << 32) | ((uint64_t) in->u[2]);
437     tl = ((uint64_t) in->u[1] << 32) | ((uint64_t) in->u[0]);
438    
439     oh = th >> (shift * 8);
440     ol = tl >> (shift * 8);
441     ol |= th << (64 - shift * 8);
442     out->u[1] = (uint32_t) (ol >> 32);
443     out->u[0] = (uint32_t) ol;
444     out->u[3] = (uint32_t) (oh >> 32);
445     out->u[2] = (uint32_t) oh;
446     }
447     #endif
448     /**
449     * This function simulates SIMD 128-bit left shift by the standard C.
450     * The 128-bit integer given in in is shifted by (shift * 8) bits.
451     * This function simulates the LITTLE ENDIAN SIMD.
452     * @param out the output of this function
453     * @param in the 128-bit data to be shifted
454     * @param shift the shift value
455     */
456     #ifdef ONLY64
457     inline static void
458     lshift128 (w128_t *out, w128_t const *in, int shift)
459     {
460     uint64_t th, tl, oh, ol;
461    
462     th = ((uint64_t) in->u[2] << 32) | ((uint64_t) in->u[3]);
463     tl = ((uint64_t) in->u[0] << 32) | ((uint64_t) in->u[1]);
464    
465     oh = th << (shift * 8);
466     ol = tl << (shift * 8);
467     oh |= tl >> (64 - shift * 8);
468     out->u[0] = (uint32_t) (ol >> 32);
469     out->u[1] = (uint32_t) ol;
470     out->u[2] = (uint32_t) (oh >> 32);
471     out->u[3] = (uint32_t) oh;
472     }
473     #else
474     inline static void
475     lshift128 (w128_t *out, w128_t const *in, int shift)
476     {
477     uint64_t th, tl, oh, ol;
478    
479     th = ((uint64_t) in->u[3] << 32) | ((uint64_t) in->u[2]);
480     tl = ((uint64_t) in->u[1] << 32) | ((uint64_t) in->u[0]);
481    
482     oh = th << (shift * 8);
483     ol = tl << (shift * 8);
484     oh |= tl >> (64 - shift * 8);
485     out->u[1] = (uint32_t) (ol >> 32);
486     out->u[0] = (uint32_t) ol;
487     out->u[3] = (uint32_t) (oh >> 32);
488     out->u[2] = (uint32_t) oh;
489     }
490     #endif
491    
492     /**
493     * This function represents the recursion formula.
494     * @param r output
495     * @param a a 128-bit part of the internal state array
496     * @param b a 128-bit part of the internal state array
497     * @param c a 128-bit part of the internal state array
498     * @param d a 128-bit part of the internal state array
499     */
500     #ifdef ONLY64
501     inline static void
502     do_recursion (w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d)
503     {
504     w128_t x;
505     w128_t y;
506    
507     lshift128 (&x, a, SL2);
508     rshift128 (&y, c, SR2);
509     r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0] ^ (d->u[0] << SL1);
510     r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1] ^ (d->u[1] << SL1);
511     r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2] ^ (d->u[2] << SL1);
512     r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3] ^ (d->u[3] << SL1);
513     }
514     #else
515     inline static void
516     do_recursion (w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d)
517     {
518     w128_t x;
519     w128_t y;
520    
521     lshift128 (&x, a, SL2);
522     rshift128 (&y, c, SR2);
523     r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0] ^ (d->u[0] << SL1);
524     r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1] ^ (d->u[1] << SL1);
525     r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2] ^ (d->u[2] << SL1);
526     r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3] ^ (d->u[3] << SL1);
527     }
528     #endif
529    
530     #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
531     /**
532     * This function fills the internal state array with pseudorandom
533     * integers.
534     */
535     inline static void
536     gen_rand_all (void)
537     {
538     int i;
539     w128_t *r1, *r2;
540    
541     r1 = &sfmt[N - 2];
542     r2 = &sfmt[N - 1];
543     for (i = 0; i < N - POS1; i++)
544     {
545     do_recursion (&sfmt[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
546     r1 = r2;
547     r2 = &sfmt[i];
548     }
549     for (; i < N; i++)
550     {
551     do_recursion (&sfmt[i], &sfmt[i], &sfmt[i + POS1 - N], r1, r2);
552     r1 = r2;
553     r2 = &sfmt[i];
554     }
555     }
556    
557     /**
558     * This function fills the user-specified array with pseudorandom
559     * integers.
560     *
561     * @param array an 128-bit array to be filled by pseudorandom numbers.
562     * @param size number of 128-bit pseudorandom numbers to be generated.
563     */
564     inline static void
565     gen_rand_array (w128_t *array, int size)
566     {
567     int i, j;
568     w128_t *r1, *r2;
569    
570     r1 = &sfmt[N - 2];
571     r2 = &sfmt[N - 1];
572     for (i = 0; i < N - POS1; i++)
573     {
574     do_recursion (&array[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
575     r1 = r2;
576     r2 = &array[i];
577     }
578     for (; i < N; i++)
579     {
580     do_recursion (&array[i], &sfmt[i], &array[i + POS1 - N], r1, r2);
581     r1 = r2;
582     r2 = &array[i];
583     }
584     for (; i < size - N; i++)
585     {
586     do_recursion (&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
587     r1 = r2;
588     r2 = &array[i];
589     }
590     for (j = 0; j < 2 * N - size; j++)
591     {
592     sfmt[j] = array[j + size - N];
593     }
594     for (; i < size; i++, j++)
595     {
596     do_recursion (&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
597     r1 = r2;
598     r2 = &array[i];
599     sfmt[j] = array[i];
600     }
601     }
602     #endif
603    
604     #if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
605     inline static void
606     swap (w128_t *array, int size)
607     {
608     int i;
609     uint32_t x, y;
610    
611     for (i = 0; i < size; i++)
612     {
613     x = array[i].u[0];
614     y = array[i].u[2];
615     array[i].u[0] = array[i].u[1];
616     array[i].u[2] = array[i].u[3];
617     array[i].u[1] = x;
618     array[i].u[3] = y;
619     }
620     }
621     #endif
622     /**
623     * This function represents a function used in the initialization
624     * by init_by_array
625     * @param x 32-bit integer
626     * @return 32-bit integer
627     */
628     inline static uint32_t
629     func1 (uint32_t x)
630     {
631     return (x ^ (x >> 27)) * (uint32_t) 1664525UL;
632     }
633    
634     /**
635     * This function represents a function used in the initialization
636     * by init_by_array
637     * @param x 32-bit integer
638     * @return 32-bit integer
639     */
640     inline static uint32_t
641     func2 (uint32_t x)
642     {
643     return (x ^ (x >> 27)) * (uint32_t) 1566083941UL;
644     }
645    
646     /**
647     * This function certificate the period of 2^{MEXP}
648     */
649     static void
650     period_certification (void)
651     {
652     int inner = 0;
653     int i, j;
654     uint32_t work;
655    
656     for (i = 0; i < 4; i++)
657     inner ^= psfmt32[idxof (i)] & parity[i];
658     for (i = 16; i > 0; i >>= 1)
659     inner ^= inner >> i;
660     inner &= 1;
661     /* check OK */
662     if (inner == 1)
663     {
664     return;
665     }
666     /* check NG, and modification */
667     for (i = 0; i < 4; i++)
668     {
669     work = 1;
670     for (j = 0; j < 32; j++)
671     {
672     if ((work & parity[i]) != 0)
673     {
674     psfmt32[idxof (i)] ^= work;
675     return;
676     }
677     work = work << 1;
678     }
679     }
680     }
681    
682     /*----------------
683     PUBLIC FUNCTIONS
684     ----------------*/
685     /**
686     * This function returns the identification string.
687     * The string shows the word size, the Mersenne exponent,
688     * and all parameters of this generator.
689     */
690     char const * const
691     get_idstring (void)
692     {
693     return IDSTR;
694     }
695    
696     /**
697     * This function returns the minimum size of array used for \b
698     * fill_array32() function.
699     * @return minimum size of array used for fill_array32() function.
700     */
701     int
702     get_min_array_size32 (void)
703     {
704     return N32;
705     }
706    
707     /**
708     * This function returns the minimum size of array used for \b
709     * fill_array64() function.
710     * @return minimum size of array used for fill_array64() function.
711     */
712     int
713     get_min_array_size64 (void)
714     {
715     return N64;
716     }
717    
718     #ifndef ONLY64
719     /**
720     * This function generates and returns 32-bit pseudorandom number.
721     * init_gen_rand or init_by_array must be called before this function.
722     * @return 32-bit pseudorandom number
723     */
724     uint32_t
725     gen_rand32 (void)
726     {
727     uint32_t r;
728    
729     assert (initialized);
730     if (idx >= N32)
731     {
732     gen_rand_all ();
733     idx = 0;
734     }
735     r = psfmt32[idx++];
736     return r;
737     }
738     #endif
739     /**
740     * This function generates and returns 64-bit pseudorandom number.
741     * init_gen_rand or init_by_array must be called before this function.
742     * The function gen_rand64 should not be called after gen_rand32,
743     * unless an initialization is again executed.
744     * @return 64-bit pseudorandom number
745     */
746     uint64_t
747     gen_rand64 (void)
748     {
749     #if defined(BIG_ENDIAN64) && !defined(ONLY64)
750     uint32_t r1, r2;
751     #else
752     uint64_t r;
753     #endif
754    
755     assert (initialized);
756     assert (idx % 2 == 0);
757    
758     if (idx >= N32)
759     {
760     gen_rand_all ();
761     idx = 0;
762     }
763     #if defined(BIG_ENDIAN64) && !defined(ONLY64)
764     r1 = psfmt32[idx];
765     r2 = psfmt32[idx + 1];
766     idx += 2;
767     return ((uint64_t) r2 << 32) | r1;
768     #else
769     r = psfmt64[idx / 2];
770     idx += 2;
771     return r;
772     #endif
773     }
774    
775     #ifndef ONLY64
776     /**
777     * This function generates pseudorandom 32-bit integers in the
778     * specified array[] by one call. The number of pseudorandom integers
779     * is specified by the argument size, which must be at least 624 and a
780     * multiple of four. The generation by this function is much faster
781     * than the following gen_rand function.
782     *
783     * For initialization, init_gen_rand or init_by_array must be called
784     * before the first call of this function. This function can not be
785     * used after calling gen_rand function, without initialization.
786     *
787     * @param array an array where pseudorandom 32-bit integers are filled
788     * by this function. The pointer to the array must be \b "aligned"
789     * (namely, must be a multiple of 16) in the SIMD version, since it
790     * refers to the address of a 128-bit integer. In the standard C
791     * version, the pointer is arbitrary.
792     *
793     * @param size the number of 32-bit pseudorandom integers to be
794     * generated. size must be a multiple of 4, and greater than or equal
795     * to (MEXP / 128 + 1) * 4.
796     *
797     * @note \b memalign or \b posix_memalign is available to get aligned
798     * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
799     * returns the pointer to the aligned memory block.
800     */
801     void
802     fill_array32 (uint32_t * array, int size)
803     {
804     assert (initialized);
805     assert (idx == N32);
806     assert (size % 4 == 0);
807     assert (size >= N32);
808    
809     gen_rand_array (reinterpret_cast<w128_t *> (array), size / 4);
810     idx = N32;
811     }
812     #endif
813    
814     /**
815     * This function generates pseudorandom 64-bit integers in the
816     * specified array[] by one call. The number of pseudorandom integers
817     * is specified by the argument size, which must be at least 312 and a
818     * multiple of two. The generation by this function is much faster
819     * than the following gen_rand function.
820     *
821     * For initialization, init_gen_rand or init_by_array must be called
822     * before the first call of this function. This function can not be
823     * used after calling gen_rand function, without initialization.
824     *
825     * @param array an array where pseudorandom 64-bit integers are filled
826     * by this function. The pointer to the array must be "aligned"
827     * (namely, must be a multiple of 16) in the SIMD version, since it
828     * refers to the address of a 128-bit integer. In the standard C
829     * version, the pointer is arbitrary.
830     *
831     * @param size the number of 64-bit pseudorandom integers to be
832     * generated. size must be a multiple of 2, and greater than or equal
833     * to (MEXP / 128 + 1) * 2
834     *
835     * @note \b memalign or \b posix_memalign is available to get aligned
836     * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
837     * returns the pointer to the aligned memory block.
838     */
839     void
840     fill_array64 (uint64_t * array, int size)
841     {
842     assert (initialized);
843     assert (idx == N32);
844     assert (size % 2 == 0);
845     assert (size >= N64);
846    
847     gen_rand_array (reinterpret_cast<w128_t *> (array), size / 2);
848     idx = N32;
849    
850     #if defined(BIG_ENDIAN64) && !defined(ONLY64)
851     swap (reinterpret_cast<w128_t *> (array), size / 2);
852     #endif
853     }
854    
855     /**
856     * This function initializes the internal state array with a 32-bit
857     * integer seed.
858     *
859     * @param seed a 32-bit integer used as the seed.
860     */
861     void
862     init_gen_rand (uint32_t seed)
863     {
864     int i;
865    
866     psfmt32[idxof (0)] = seed;
867     for (i = 1; i < N32; i++)
868     {
869     psfmt32[idxof (i)] = 1812433253UL * (psfmt32[idxof (i - 1)] ^ (psfmt32[idxof (i - 1)] >> 30)) + i;
870     }
871     idx = N32;
872     period_certification ();
873     initialized = 1;
874     }
875    
876     /**
877     * This function initializes the internal state array,
878     * with an array of 32-bit integers used as the seeds
879     * @param init_key the array of 32-bit integers, used as a seed.
880     * @param key_length the length of init_key.
881     */
882     void
883     init_by_array (uint32_t * init_key, int key_length)
884     {
885     int i, j, count;
886     uint32_t r;
887     int lag;
888     int mid;
889     int size = N * 4;
890    
891     if (size >= 623)
892     {
893     lag = 11;
894     }
895     else if (size >= 68)
896     {
897     lag = 7;
898     }
899     else if (size >= 39)
900     {
901     lag = 5;
902     }
903     else
904     {
905     lag = 3;
906     }
907     mid = (size - lag) / 2;
908    
909     memset (sfmt, 0x8b, sizeof (sfmt));
910     if (key_length + 1 > N32)
911     {
912     count = key_length + 1;
913     }
914     else
915     {
916     count = N32;
917     }
918     r = func1 (psfmt32[idxof (0)] ^ psfmt32[idxof (mid)] ^ psfmt32[idxof (N32 - 1)]);
919     psfmt32[idxof (mid)] += r;
920     r += key_length;
921     psfmt32[idxof (mid + lag)] += r;
922     psfmt32[idxof (0)] = r;
923    
924     count--;
925     for (i = 1, j = 0; (j < count) && (j < key_length); j++)
926     {
927     r = func1 (psfmt32[idxof (i)] ^ psfmt32[idxof ((i + mid) % N32)] ^ psfmt32[idxof ((i + N32 - 1) % N32)]);
928     psfmt32[idxof ((i + mid) % N32)] += r;
929     r += init_key[j] + i;
930     psfmt32[idxof ((i + mid + lag) % N32)] += r;
931     psfmt32[idxof (i)] = r;
932     i = (i + 1) % N32;
933     }
934     for (; j < count; j++)
935     {
936     r = func1 (psfmt32[idxof (i)] ^ psfmt32[idxof ((i + mid) % N32)] ^ psfmt32[idxof ((i + N32 - 1) % N32)]);
937     psfmt32[idxof ((i + mid) % N32)] += r;
938     r += i;
939     psfmt32[idxof ((i + mid + lag) % N32)] += r;
940     psfmt32[idxof (i)] = r;
941     i = (i + 1) % N32;
942     }
943     for (j = 0; j < N32; j++)
944     {
945     r = func2 (psfmt32[idxof (i)] + psfmt32[idxof ((i + mid) % N32)] + psfmt32[idxof ((i + N32 - 1) % N32)]);
946     psfmt32[idxof ((i + mid) % N32)] ^= r;
947     r -= i;
948     psfmt32[idxof ((i + mid + lag) % N32)] ^= r;
949     psfmt32[idxof (i)] = r;
950     i = (i + 1) % N32;
951     }
952    
953     idx = N32;
954     period_certification ();
955     initialized = 1;
956     }