1 |
/** |
2 |
* random.C: SIMD oriented Fast Mersenne Twister(SFMT) |
3 |
* |
4 |
* Copyright © 2007 Pippijn van Steenhoven / The Ermyth Team |
5 |
* Rights to this code are as documented in COPYING. |
6 |
* |
7 |
* |
8 |
* Portions of this file were derived from sources bearing the following license: |
9 |
* |
10 |
* Copyright © 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima |
11 |
* University. All rights reserved. |
12 |
* |
13 |
* @author Mutsuo Saito (Hiroshima University) |
14 |
* @author Makoto Matsumoto (Hiroshima University) |
15 |
* |
16 |
* The new BSD License is applied to this software. |
17 |
*/ |
18 |
|
19 |
static char const rcsid[] = "$Id: random.C,v 1.3 2007-09-16 18:54:45 pippijn Exp $"; |
20 |
|
21 |
#include <string.h> |
22 |
#include <assert.h> |
23 |
|
24 |
#include <common/random.h> |
25 |
|
26 |
/*----------------- |
27 |
BASIC DEFINITIONS |
28 |
-----------------*/ |
29 |
/** SFMT generator has an internal state array of 128-bit integers, |
30 |
* and N is its size. */ |
31 |
#define N (216091 / 128 + 1) |
32 |
/** N32 is the size of internal state array when regarded as an array |
33 |
* of 32-bit integers.*/ |
34 |
#define N32 (N * 4) |
35 |
/** N64 is the size of internal state array when regarded as an array |
36 |
* of 64-bit integers.*/ |
37 |
#define N64 (N * 2) |
38 |
|
39 |
/*---------------------- |
40 |
the parameters of SFMT |
41 |
following definitions are in paramsXXXX.h file. |
42 |
----------------------*/ |
43 |
// the pick up position of the array. |
44 |
#define POS1 627 |
45 |
// the parameter of shift left as four 32-bit registers. |
46 |
#define SL1 11 |
47 |
// the parameter of shift left as one 128-bit register. |
48 |
// The 128-bit integer is shifted by (SL2 * 8) bits. |
49 |
#define SL2 3 |
50 |
// the parameter of shift right as four 32-bit registers. |
51 |
#define SR1 10 |
52 |
// the parameter of shift right as one 128-bit register. |
53 |
// The 128-bit integer is shifted by (SL2 * 8) bits. |
54 |
#define SR2 1 |
55 |
// A bitmask, used in the recursion. These parameters are introduced |
56 |
// to break symmetry of SIMD. |
57 |
#define MSK1 0xbff7bff7U |
58 |
#define MSK2 0xbfffffffU |
59 |
#define MSK3 0xbffffa7fU |
60 |
#define MSK4 0xffddfbfbU |
61 |
// These definitions are part of a 128-bit period certification vector. |
62 |
#define PARITY1 0xf8000001U |
63 |
#define PARITY2 0x89e80709U |
64 |
#define PARITY3 0x3bd2b64bU |
65 |
#define PARITY4 0x0c64b1e4U |
66 |
|
67 |
/* PARAMETERS FOR ALTIVEC */ |
68 |
#if defined(__APPLE__) /* For OSX */ |
69 |
#define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1) |
70 |
#define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1) |
71 |
#define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4) |
72 |
#define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3) |
73 |
#define ALTI_SL2_PERM (vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10) |
74 |
#define ALTI_SL2_PERM64 (vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2) |
75 |
#define ALTI_SR2_PERM (vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14) |
76 |
#define ALTI_SR2_PERM64 (vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14) |
77 |
#else // !__APPLE__ |
78 |
#define ALTI_SL1 {SL1, SL1, SL1, SL1} |
79 |
#define ALTI_SR1 {SR1, SR1, SR1, SR1} |
80 |
#define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} |
81 |
#define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} |
82 |
#define ALTI_SL2_PERM { 3,21,21,21, 7, 0, 1, 2,11, 4, 5, 6,15, 8, 9,10} |
83 |
#define ALTI_SL2_PERM64 { 3, 4, 5, 6, 7,29,29,29,11,12,13,14,15, 0, 1, 2} |
84 |
#define ALTI_SR2_PERM { 7, 0, 1, 2,11, 4, 5, 6,15, 8, 9,10,17,12,13,14} |
85 |
#define ALTI_SR2_PERM64 {15, 0, 1, 2, 3, 4, 5, 6,17, 8, 9,10,11,12,13,14} |
86 |
#endif // __APPLE__ |
87 |
#define IDSTR "SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb" |
88 |
|
89 |
#if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64) |
90 |
#define BIG_ENDIAN64 1 |
91 |
#endif |
92 |
#if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64) |
93 |
#define BIG_ENDIAN64 1 |
94 |
#endif |
95 |
#if defined(ONLY64) && !defined(BIG_ENDIAN64) |
96 |
#if defined(__GNUC__) |
97 |
#error "-DONLY64 must be specified with -DBIG_ENDIAN64" |
98 |
#endif |
99 |
#undef ONLY64 |
100 |
#endif |
101 |
|
102 |
/*-------------------------------------- |
103 |
FILE GLOBAL VARIABLES |
104 |
internal state, index counter and flag |
105 |
--------------------------------------*/ |
106 |
/** the 128-bit internal state array */ |
107 |
static w128_t sfmt[N]; |
108 |
/** the 32bit integer pointer to the 128-bit internal state array */ |
109 |
static uint32_t *psfmt32 = &sfmt[0].u[0]; |
110 |
#if !defined(BIG_ENDIAN64) || defined(ONLY64) |
111 |
/** the 64bit integer pointer to the 128-bit internal state array */ |
112 |
static uint64_t *psfmt64 = reinterpret_cast<uint64_t *> (&sfmt[0].u[0]); |
113 |
#endif |
114 |
/** index counter to the 32-bit internal state array */ |
115 |
static int idx; |
116 |
/** a flag: it is 0 if and only if the internal state is not yet |
117 |
* initialized. */ |
118 |
static int initialized = 0; |
119 |
/** a parity check vector which certificate the period of 2^{MEXP} */ |
120 |
static uint32_t parity[4] = { PARITY1, PARITY2, PARITY3, PARITY4 }; |
121 |
|
122 |
/*---------------- |
123 |
STATIC FUNCTIONS |
124 |
----------------*/ |
125 |
inline static int idxof (int i); |
126 |
inline static void rshift128 (w128_t *out, w128_t const *in, int shift); |
127 |
inline static void lshift128 (w128_t *out, w128_t const *in, int shift); |
128 |
inline static void gen_rand_all (void); |
129 |
inline static void gen_rand_array (w128_t *array, int size); |
130 |
inline static uint32_t func1 (uint32_t x); |
131 |
inline static uint32_t func2 (uint32_t x); |
132 |
inline static void period_certification (void); |
133 |
#if defined(BIG_ENDIAN64) && !defined(ONLY64) |
134 |
inline static void swap (w128_t *array, int size); |
135 |
#endif |
136 |
|
137 |
#if defined(HAVE_ALTIVEC) |
138 |
inline static vector unsigned int |
139 |
vec_recursion (vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d) ALWAYSINLINE; |
140 |
|
141 |
/** |
142 |
* This function represents the recursion formula in AltiVec and BIG ENDIAN. |
143 |
* @param a a 128-bit part of the interal state array |
144 |
* @param b a 128-bit part of the interal state array |
145 |
* @param c a 128-bit part of the interal state array |
146 |
* @param d a 128-bit part of the interal state array |
147 |
* @return output |
148 |
*/ |
149 |
inline static vector unsigned int vec_recursion (vector unsigned int a, vector unsigned int b, vector unsigned int c, vector unsigned int d) |
150 |
{ |
151 |
const vector unsigned int sl1 = ALTI_SL1; |
152 |
const vector unsigned int sr1 = ALTI_SR1; |
153 |
#ifdef ONLY64 |
154 |
const vector unsigned int mask = ALTI_MSK64; |
155 |
const vector unsigned char perm_sl = ALTI_SL2_PERM64; |
156 |
const vector unsigned char perm_sr = ALTI_SR2_PERM64; |
157 |
#else |
158 |
const vector unsigned int mask = ALTI_MSK; |
159 |
const vector unsigned char perm_sl = ALTI_SL2_PERM; |
160 |
const vector unsigned char perm_sr = ALTI_SR2_PERM; |
161 |
#endif |
162 |
vector unsigned int v, w, x, y, z; |
163 |
x = vec_perm (a, (vector unsigned int) perm_sl, perm_sl); |
164 |
v = a; |
165 |
y = vec_sr (b, sr1); |
166 |
z = vec_perm (c, (vector unsigned int) perm_sr, perm_sr); |
167 |
w = vec_sl (d, sl1); |
168 |
z = vec_xor (z, w); |
169 |
y = vec_and (y, mask); |
170 |
v = vec_xor (v, x); |
171 |
z = vec_xor (z, y); |
172 |
z = vec_xor (z, v); |
173 |
return z; |
174 |
} |
175 |
|
176 |
/** |
177 |
* This function fills the internal state array with pseudorandom |
178 |
* integers. |
179 |
*/ |
180 |
inline static void |
181 |
gen_rand_all (void) |
182 |
{ |
183 |
int i; |
184 |
vector unsigned int r, r1, r2; |
185 |
|
186 |
r1 = sfmt[N - 2].s; |
187 |
r2 = sfmt[N - 1].s; |
188 |
for (i = 0; i < N - POS1; i++) |
189 |
{ |
190 |
r = vec_recursion (sfmt[i].s, sfmt[i + POS1].s, r1, r2); |
191 |
sfmt[i].s = r; |
192 |
r1 = r2; |
193 |
r2 = r; |
194 |
} |
195 |
for (; i < N; i++) |
196 |
{ |
197 |
r = vec_recursion (sfmt[i].s, sfmt[i + POS1 - N].s, r1, r2); |
198 |
sfmt[i].s = r; |
199 |
r1 = r2; |
200 |
r2 = r; |
201 |
} |
202 |
} |
203 |
|
204 |
/** |
205 |
* This function fills the user-specified array with pseudorandom |
206 |
* integers. |
207 |
* |
208 |
* @param array an 128-bit array to be filled by pseudorandom numbers. |
209 |
* @param size number of 128-bit pesudorandom numbers to be generated. |
210 |
*/ |
211 |
inline static void |
212 |
gen_rand_array (w128_t *array, int size) |
213 |
{ |
214 |
int i, j; |
215 |
vector unsigned int r, r1, r2; |
216 |
|
217 |
r1 = sfmt[N - 2].s; |
218 |
r2 = sfmt[N - 1].s; |
219 |
for (i = 0; i < N - POS1; i++) |
220 |
{ |
221 |
r = vec_recursion (sfmt[i].s, sfmt[i + POS1].s, r1, r2); |
222 |
array[i].s = r; |
223 |
r1 = r2; |
224 |
r2 = r; |
225 |
} |
226 |
for (; i < N; i++) |
227 |
{ |
228 |
r = vec_recursion (sfmt[i].s, array[i + POS1 - N].s, r1, r2); |
229 |
array[i].s = r; |
230 |
r1 = r2; |
231 |
r2 = r; |
232 |
} |
233 |
/* main loop */ |
234 |
for (; i < size - N; i++) |
235 |
{ |
236 |
r = vec_recursion (array[i - N].s, array[i + POS1 - N].s, r1, r2); |
237 |
array[i].s = r; |
238 |
r1 = r2; |
239 |
r2 = r; |
240 |
} |
241 |
for (j = 0; j < 2 * N - size; j++) |
242 |
{ |
243 |
sfmt[j].s = array[j + size - N].s; |
244 |
} |
245 |
for (; i < size; i++) |
246 |
{ |
247 |
r = vec_recursion (array[i - N].s, array[i + POS1 - N].s, r1, r2); |
248 |
array[i].s = r; |
249 |
sfmt[j++].s = r; |
250 |
r1 = r2; |
251 |
r2 = r; |
252 |
} |
253 |
} |
254 |
|
255 |
#ifndef ONLY64 |
256 |
#if defined(__APPLE__) |
257 |
#define ALTI_SWAP (vector unsigned char) \ |
258 |
(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11) |
259 |
#else |
260 |
#define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11} |
261 |
#endif |
262 |
/** |
263 |
* This function swaps high and low 32-bit of 64-bit integers in user |
264 |
* specified array. |
265 |
* |
266 |
* @param array an 128-bit array to be swaped. |
267 |
* @param size size of 128-bit array. |
268 |
*/ |
269 |
inline static void |
270 |
swap (w128_t *array, int size) |
271 |
{ |
272 |
int i; |
273 |
const vector unsigned char perm = ALTI_SWAP; |
274 |
|
275 |
for (i = 0; i < size; i++) |
276 |
{ |
277 |
array[i].s = vec_perm (array[i].s, (vector unsigned int) perm, perm); |
278 |
} |
279 |
} |
280 |
#endif |
281 |
#elif defined(HAVE_SSE2) |
282 |
inline static __m128i |
283 |
mm_recursion (__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask); |
284 |
|
285 |
/** |
286 |
* This function represents the recursion formula. |
287 |
* @param a a 128-bit part of the interal state array |
288 |
* @param b a 128-bit part of the interal state array |
289 |
* @param c a 128-bit part of the interal state array |
290 |
* @param d a 128-bit part of the interal state array |
291 |
* @param mask 128-bit mask |
292 |
* @return output |
293 |
*/ |
294 |
inline static __m128i mm_recursion (__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask) |
295 |
{ |
296 |
__m128i v, x, y, z; |
297 |
|
298 |
x = _mm_load_si128 (a); |
299 |
y = _mm_srli_epi32 (*b, SR1); |
300 |
z = _mm_srli_si128 (c, SR2); |
301 |
v = _mm_slli_epi32 (d, SL1); |
302 |
z = _mm_xor_si128 (z, x); |
303 |
z = _mm_xor_si128 (z, v); |
304 |
x = _mm_slli_si128 (x, SL2); |
305 |
y = _mm_and_si128 (y, mask); |
306 |
z = _mm_xor_si128 (z, x); |
307 |
z = _mm_xor_si128 (z, y); |
308 |
return z; |
309 |
} |
310 |
|
311 |
/** |
312 |
* This function fills the internal state array with pseudorandom |
313 |
* integers. |
314 |
*/ |
315 |
inline static void |
316 |
gen_rand_all (void) |
317 |
{ |
318 |
int i; |
319 |
__m128i r, r1, r2, mask; |
320 |
mask = _mm_set_epi32 (MSK4, MSK3, MSK2, MSK1); |
321 |
|
322 |
r1 = _mm_load_si128 (&sfmt[N - 2].si); |
323 |
r2 = _mm_load_si128 (&sfmt[N - 1].si); |
324 |
for (i = 0; i < N - POS1; i++) |
325 |
{ |
326 |
r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask); |
327 |
_mm_store_si128 (&sfmt[i].si, r); |
328 |
r1 = r2; |
329 |
r2 = r; |
330 |
} |
331 |
for (; i < N; i++) |
332 |
{ |
333 |
r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1 - N].si, r1, r2, mask); |
334 |
_mm_store_si128 (&sfmt[i].si, r); |
335 |
r1 = r2; |
336 |
r2 = r; |
337 |
} |
338 |
} |
339 |
|
340 |
/** |
341 |
* This function fills the user-specified array with pseudorandom |
342 |
* integers. |
343 |
* |
344 |
* @param array an 128-bit array to be filled by pseudorandom numbers. |
345 |
* @param size number of 128-bit pesudorandom numbers to be generated. |
346 |
*/ |
347 |
inline static void |
348 |
gen_rand_array (w128_t *array, int size) |
349 |
{ |
350 |
int i, j; |
351 |
__m128i r, r1, r2, mask; |
352 |
mask = _mm_set_epi32 (MSK4, MSK3, MSK2, MSK1); |
353 |
|
354 |
r1 = _mm_load_si128 (&sfmt[N - 2].si); |
355 |
r2 = _mm_load_si128 (&sfmt[N - 1].si); |
356 |
for (i = 0; i < N - POS1; i++) |
357 |
{ |
358 |
r = mm_recursion (&sfmt[i].si, &sfmt[i + POS1].si, r1, r2, mask); |
359 |
_mm_store_si128 (&array[i].si, r); |
360 |
r1 = r2; |
361 |
r2 = r; |
362 |
} |
363 |
for (; i < N; i++) |
364 |
{ |
365 |
r = mm_recursion (&sfmt[i].si, &array[i + POS1 - N].si, r1, r2, mask); |
366 |
_mm_store_si128 (&array[i].si, r); |
367 |
r1 = r2; |
368 |
r2 = r; |
369 |
} |
370 |
/* main loop */ |
371 |
for (; i < size - N; i++) |
372 |
{ |
373 |
r = mm_recursion (&array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask); |
374 |
_mm_store_si128 (&array[i].si, r); |
375 |
r1 = r2; |
376 |
r2 = r; |
377 |
} |
378 |
for (j = 0; j < 2 * N - size; j++) |
379 |
{ |
380 |
r = _mm_load_si128 (&array[j + size - N].si); |
381 |
_mm_store_si128 (&sfmt[j].si, r); |
382 |
} |
383 |
for (; i < size; i++) |
384 |
{ |
385 |
r = mm_recursion (&array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask); |
386 |
_mm_store_si128 (&array[i].si, r); |
387 |
_mm_store_si128 (&sfmt[j++].si, r); |
388 |
r1 = r2; |
389 |
r2 = r; |
390 |
} |
391 |
} |
392 |
#endif |
393 |
|
394 |
/** |
395 |
* This function simulate a 64-bit index of LITTLE ENDIAN |
396 |
* in BIG ENDIAN machine. |
397 |
*/ |
398 |
#ifdef ONLY64 |
399 |
inline static int |
400 |
idxof (int i) |
401 |
{ |
402 |
return i ^ 1; |
403 |
} |
404 |
#else |
405 |
inline static int |
406 |
idxof (int i) |
407 |
{ |
408 |
return i; |
409 |
} |
410 |
#endif |
411 |
/** |
412 |
* This function simulates SIMD 128-bit right shift by the standard C. |
413 |
* The 128-bit integer given in in is shifted by (shift * 8) bits. |
414 |
* This function simulates the LITTLE ENDIAN SIMD. |
415 |
* @param out the output of this function |
416 |
* @param in the 128-bit data to be shifted |
417 |
* @param shift the shift value |
418 |
*/ |
419 |
#ifdef ONLY64 |
420 |
inline static void |
421 |
rshift128 (w128_t *out, w128_t const *in, int shift) |
422 |
{ |
423 |
uint64_t th, tl, oh, ol; |
424 |
|
425 |
th = ((uint64_t) in->u[2] << 32) | ((uint64_t) in->u[3]); |
426 |
tl = ((uint64_t) in->u[0] << 32) | ((uint64_t) in->u[1]); |
427 |
|
428 |
oh = th >> (shift * 8); |
429 |
ol = tl >> (shift * 8); |
430 |
ol |= th << (64 - shift * 8); |
431 |
out->u[0] = (uint32_t) (ol >> 32); |
432 |
out->u[1] = (uint32_t) ol; |
433 |
out->u[2] = (uint32_t) (oh >> 32); |
434 |
out->u[3] = (uint32_t) oh; |
435 |
} |
436 |
#else |
437 |
inline static void |
438 |
rshift128 (w128_t *out, w128_t const *in, int shift) |
439 |
{ |
440 |
uint64_t th, tl, oh, ol; |
441 |
|
442 |
th = ((uint64_t) in->u[3] << 32) | ((uint64_t) in->u[2]); |
443 |
tl = ((uint64_t) in->u[1] << 32) | ((uint64_t) in->u[0]); |
444 |
|
445 |
oh = th >> (shift * 8); |
446 |
ol = tl >> (shift * 8); |
447 |
ol |= th << (64 - shift * 8); |
448 |
out->u[1] = (uint32_t) (ol >> 32); |
449 |
out->u[0] = (uint32_t) ol; |
450 |
out->u[3] = (uint32_t) (oh >> 32); |
451 |
out->u[2] = (uint32_t) oh; |
452 |
} |
453 |
#endif |
454 |
/** |
455 |
* This function simulates SIMD 128-bit left shift by the standard C. |
456 |
* The 128-bit integer given in in is shifted by (shift * 8) bits. |
457 |
* This function simulates the LITTLE ENDIAN SIMD. |
458 |
* @param out the output of this function |
459 |
* @param in the 128-bit data to be shifted |
460 |
* @param shift the shift value |
461 |
*/ |
462 |
#ifdef ONLY64 |
463 |
inline static void |
464 |
lshift128 (w128_t *out, w128_t const *in, int shift) |
465 |
{ |
466 |
uint64_t th, tl, oh, ol; |
467 |
|
468 |
th = ((uint64_t) in->u[2] << 32) | ((uint64_t) in->u[3]); |
469 |
tl = ((uint64_t) in->u[0] << 32) | ((uint64_t) in->u[1]); |
470 |
|
471 |
oh = th << (shift * 8); |
472 |
ol = tl << (shift * 8); |
473 |
oh |= tl >> (64 - shift * 8); |
474 |
out->u[0] = (uint32_t) (ol >> 32); |
475 |
out->u[1] = (uint32_t) ol; |
476 |
out->u[2] = (uint32_t) (oh >> 32); |
477 |
out->u[3] = (uint32_t) oh; |
478 |
} |
479 |
#else |
480 |
inline static void |
481 |
lshift128 (w128_t *out, w128_t const *in, int shift) |
482 |
{ |
483 |
uint64_t th, tl, oh, ol; |
484 |
|
485 |
th = ((uint64_t) in->u[3] << 32) | ((uint64_t) in->u[2]); |
486 |
tl = ((uint64_t) in->u[1] << 32) | ((uint64_t) in->u[0]); |
487 |
|
488 |
oh = th << (shift * 8); |
489 |
ol = tl << (shift * 8); |
490 |
oh |= tl >> (64 - shift * 8); |
491 |
out->u[1] = (uint32_t) (ol >> 32); |
492 |
out->u[0] = (uint32_t) ol; |
493 |
out->u[3] = (uint32_t) (oh >> 32); |
494 |
out->u[2] = (uint32_t) oh; |
495 |
} |
496 |
#endif |
497 |
|
498 |
/** |
499 |
* This function represents the recursion formula. |
500 |
* @param r output |
501 |
* @param a a 128-bit part of the internal state array |
502 |
* @param b a 128-bit part of the internal state array |
503 |
* @param c a 128-bit part of the internal state array |
504 |
* @param d a 128-bit part of the internal state array |
505 |
*/ |
506 |
#ifdef ONLY64 |
507 |
inline static void |
508 |
do_recursion (w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d) |
509 |
{ |
510 |
w128_t x; |
511 |
w128_t y; |
512 |
|
513 |
lshift128 (&x, a, SL2); |
514 |
rshift128 (&y, c, SR2); |
515 |
r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0] ^ (d->u[0] << SL1); |
516 |
r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1] ^ (d->u[1] << SL1); |
517 |
r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2] ^ (d->u[2] << SL1); |
518 |
r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3] ^ (d->u[3] << SL1); |
519 |
} |
520 |
#else |
521 |
inline static void |
522 |
do_recursion (w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d) |
523 |
{ |
524 |
w128_t x; |
525 |
w128_t y; |
526 |
|
527 |
lshift128 (&x, a, SL2); |
528 |
rshift128 (&y, c, SR2); |
529 |
r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0] ^ (d->u[0] << SL1); |
530 |
r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1] ^ (d->u[1] << SL1); |
531 |
r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2] ^ (d->u[2] << SL1); |
532 |
r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3] ^ (d->u[3] << SL1); |
533 |
} |
534 |
#endif |
535 |
|
536 |
#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) |
537 |
/** |
538 |
* This function fills the internal state array with pseudorandom |
539 |
* integers. |
540 |
*/ |
541 |
inline static void |
542 |
gen_rand_all (void) |
543 |
{ |
544 |
int i; |
545 |
w128_t *r1, *r2; |
546 |
|
547 |
r1 = &sfmt[N - 2]; |
548 |
r2 = &sfmt[N - 1]; |
549 |
for (i = 0; i < N - POS1; i++) |
550 |
{ |
551 |
do_recursion (&sfmt[i], &sfmt[i], &sfmt[i + POS1], r1, r2); |
552 |
r1 = r2; |
553 |
r2 = &sfmt[i]; |
554 |
} |
555 |
for (; i < N; i++) |
556 |
{ |
557 |
do_recursion (&sfmt[i], &sfmt[i], &sfmt[i + POS1 - N], r1, r2); |
558 |
r1 = r2; |
559 |
r2 = &sfmt[i]; |
560 |
} |
561 |
} |
562 |
|
563 |
/** |
564 |
* This function fills the user-specified array with pseudorandom |
565 |
* integers. |
566 |
* |
567 |
* @param array an 128-bit array to be filled by pseudorandom numbers. |
568 |
* @param size number of 128-bit pseudorandom numbers to be generated. |
569 |
*/ |
570 |
inline static void |
571 |
gen_rand_array (w128_t *array, int size) |
572 |
{ |
573 |
int i, j; |
574 |
w128_t *r1, *r2; |
575 |
|
576 |
r1 = &sfmt[N - 2]; |
577 |
r2 = &sfmt[N - 1]; |
578 |
for (i = 0; i < N - POS1; i++) |
579 |
{ |
580 |
do_recursion (&array[i], &sfmt[i], &sfmt[i + POS1], r1, r2); |
581 |
r1 = r2; |
582 |
r2 = &array[i]; |
583 |
} |
584 |
for (; i < N; i++) |
585 |
{ |
586 |
do_recursion (&array[i], &sfmt[i], &array[i + POS1 - N], r1, r2); |
587 |
r1 = r2; |
588 |
r2 = &array[i]; |
589 |
} |
590 |
for (; i < size - N; i++) |
591 |
{ |
592 |
do_recursion (&array[i], &array[i - N], &array[i + POS1 - N], r1, r2); |
593 |
r1 = r2; |
594 |
r2 = &array[i]; |
595 |
} |
596 |
for (j = 0; j < 2 * N - size; j++) |
597 |
{ |
598 |
sfmt[j] = array[j + size - N]; |
599 |
} |
600 |
for (; i < size; i++, j++) |
601 |
{ |
602 |
do_recursion (&array[i], &array[i - N], &array[i + POS1 - N], r1, r2); |
603 |
r1 = r2; |
604 |
r2 = &array[i]; |
605 |
sfmt[j] = array[i]; |
606 |
} |
607 |
} |
608 |
#endif |
609 |
|
610 |
#if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC) |
611 |
inline static void |
612 |
swap (w128_t *array, int size) |
613 |
{ |
614 |
int i; |
615 |
uint32_t x, y; |
616 |
|
617 |
for (i = 0; i < size; i++) |
618 |
{ |
619 |
x = array[i].u[0]; |
620 |
y = array[i].u[2]; |
621 |
array[i].u[0] = array[i].u[1]; |
622 |
array[i].u[2] = array[i].u[3]; |
623 |
array[i].u[1] = x; |
624 |
array[i].u[3] = y; |
625 |
} |
626 |
} |
627 |
#endif |
628 |
/** |
629 |
* This function represents a function used in the initialization |
630 |
* by init_by_array |
631 |
* @param x 32-bit integer |
632 |
* @return 32-bit integer |
633 |
*/ |
634 |
inline static uint32_t |
635 |
func1 (uint32_t x) |
636 |
{ |
637 |
return (x ^ (x >> 27)) * (uint32_t) 1664525UL; |
638 |
} |
639 |
|
640 |
/** |
641 |
* This function represents a function used in the initialization |
642 |
* by init_by_array |
643 |
* @param x 32-bit integer |
644 |
* @return 32-bit integer |
645 |
*/ |
646 |
inline static uint32_t |
647 |
func2 (uint32_t x) |
648 |
{ |
649 |
return (x ^ (x >> 27)) * (uint32_t) 1566083941UL; |
650 |
} |
651 |
|
652 |
/** |
653 |
* This function certificate the period of 2^{MEXP} |
654 |
*/ |
655 |
static void |
656 |
period_certification (void) |
657 |
{ |
658 |
int inner = 0; |
659 |
int i, j; |
660 |
uint32_t work; |
661 |
|
662 |
for (i = 0; i < 4; i++) |
663 |
inner ^= psfmt32[idxof (i)] & parity[i]; |
664 |
for (i = 16; i > 0; i >>= 1) |
665 |
inner ^= inner >> i; |
666 |
inner &= 1; |
667 |
/* check OK */ |
668 |
if (inner == 1) |
669 |
{ |
670 |
return; |
671 |
} |
672 |
/* check NG, and modification */ |
673 |
for (i = 0; i < 4; i++) |
674 |
{ |
675 |
work = 1; |
676 |
for (j = 0; j < 32; j++) |
677 |
{ |
678 |
if ((work & parity[i]) != 0) |
679 |
{ |
680 |
psfmt32[idxof (i)] ^= work; |
681 |
return; |
682 |
} |
683 |
work = work << 1; |
684 |
} |
685 |
} |
686 |
} |
687 |
|
688 |
/*---------------- |
689 |
PUBLIC FUNCTIONS |
690 |
----------------*/ |
691 |
/** |
692 |
* This function returns the identification string. |
693 |
* The string shows the word size, the Mersenne exponent, |
694 |
* and all parameters of this generator. |
695 |
*/ |
696 |
char const * const |
697 |
get_idstring (void) |
698 |
{ |
699 |
return IDSTR; |
700 |
} |
701 |
|
702 |
/** |
703 |
* This function returns the minimum size of array used for \b |
704 |
* fill_array32() function. |
705 |
* @return minimum size of array used for fill_array32() function. |
706 |
*/ |
707 |
int |
708 |
get_min_array_size32 (void) |
709 |
{ |
710 |
return N32; |
711 |
} |
712 |
|
713 |
/** |
714 |
* This function returns the minimum size of array used for \b |
715 |
* fill_array64() function. |
716 |
* @return minimum size of array used for fill_array64() function. |
717 |
*/ |
718 |
int |
719 |
get_min_array_size64 (void) |
720 |
{ |
721 |
return N64; |
722 |
} |
723 |
|
724 |
#ifndef ONLY64 |
725 |
/** |
726 |
* This function generates and returns 32-bit pseudorandom number. |
727 |
* init_gen_rand or init_by_array must be called before this function. |
728 |
* @return 32-bit pseudorandom number |
729 |
*/ |
730 |
uint32_t |
731 |
gen_rand32 (void) |
732 |
{ |
733 |
uint32_t r; |
734 |
|
735 |
assert (initialized); |
736 |
if (idx >= N32) |
737 |
{ |
738 |
gen_rand_all (); |
739 |
idx = 0; |
740 |
} |
741 |
r = psfmt32[idx++]; |
742 |
return r; |
743 |
} |
744 |
#endif |
745 |
/** |
746 |
* This function generates and returns 64-bit pseudorandom number. |
747 |
* init_gen_rand or init_by_array must be called before this function. |
748 |
* The function gen_rand64 should not be called after gen_rand32, |
749 |
* unless an initialization is again executed. |
750 |
* @return 64-bit pseudorandom number |
751 |
*/ |
752 |
uint64_t |
753 |
gen_rand64 (void) |
754 |
{ |
755 |
#if defined(BIG_ENDIAN64) && !defined(ONLY64) |
756 |
uint32_t r1, r2; |
757 |
#else |
758 |
uint64_t r; |
759 |
#endif |
760 |
|
761 |
assert (initialized); |
762 |
assert (idx % 2 == 0); |
763 |
|
764 |
if (idx >= N32) |
765 |
{ |
766 |
gen_rand_all (); |
767 |
idx = 0; |
768 |
} |
769 |
#if defined(BIG_ENDIAN64) && !defined(ONLY64) |
770 |
r1 = psfmt32[idx]; |
771 |
r2 = psfmt32[idx + 1]; |
772 |
idx += 2; |
773 |
return ((uint64_t) r2 << 32) | r1; |
774 |
#else |
775 |
r = psfmt64[idx / 2]; |
776 |
idx += 2; |
777 |
return r; |
778 |
#endif |
779 |
} |
780 |
|
781 |
#ifndef ONLY64 |
782 |
/** |
783 |
* This function generates pseudorandom 32-bit integers in the |
784 |
* specified array[] by one call. The number of pseudorandom integers |
785 |
* is specified by the argument size, which must be at least 624 and a |
786 |
* multiple of four. The generation by this function is much faster |
787 |
* than the following gen_rand function. |
788 |
* |
789 |
* For initialization, init_gen_rand or init_by_array must be called |
790 |
* before the first call of this function. This function can not be |
791 |
* used after calling gen_rand function, without initialization. |
792 |
* |
793 |
* @param array an array where pseudorandom 32-bit integers are filled |
794 |
* by this function. The pointer to the array must be \b "aligned" |
795 |
* (namely, must be a multiple of 16) in the SIMD version, since it |
796 |
* refers to the address of a 128-bit integer. In the standard C |
797 |
* version, the pointer is arbitrary. |
798 |
* |
799 |
* @param size the number of 32-bit pseudorandom integers to be |
800 |
* generated. size must be a multiple of 4, and greater than or equal |
801 |
* to (MEXP / 128 + 1) * 4. |
802 |
* |
803 |
* @note \b memalign or \b posix_memalign is available to get aligned |
804 |
* memory. Mac OSX doesn't have these functions, but \b malloc of OSX |
805 |
* returns the pointer to the aligned memory block. |
806 |
*/ |
807 |
void |
808 |
fill_array32 (uint32_t * array, int size) |
809 |
{ |
810 |
assert (initialized); |
811 |
assert (idx == N32); |
812 |
assert (size % 4 == 0); |
813 |
assert (size >= N32); |
814 |
|
815 |
gen_rand_array (reinterpret_cast<w128_t *> (array), size / 4); |
816 |
idx = N32; |
817 |
} |
818 |
#endif |
819 |
|
820 |
/** |
821 |
* This function generates pseudorandom 64-bit integers in the |
822 |
* specified array[] by one call. The number of pseudorandom integers |
823 |
* is specified by the argument size, which must be at least 312 and a |
824 |
* multiple of two. The generation by this function is much faster |
825 |
* than the following gen_rand function. |
826 |
* |
827 |
* For initialization, init_gen_rand or init_by_array must be called |
828 |
* before the first call of this function. This function can not be |
829 |
* used after calling gen_rand function, without initialization. |
830 |
* |
831 |
* @param array an array where pseudorandom 64-bit integers are filled |
832 |
* by this function. The pointer to the array must be "aligned" |
833 |
* (namely, must be a multiple of 16) in the SIMD version, since it |
834 |
* refers to the address of a 128-bit integer. In the standard C |
835 |
* version, the pointer is arbitrary. |
836 |
* |
837 |
* @param size the number of 64-bit pseudorandom integers to be |
838 |
* generated. size must be a multiple of 2, and greater than or equal |
839 |
* to (MEXP / 128 + 1) * 2 |
840 |
* |
841 |
* @note \b memalign or \b posix_memalign is available to get aligned |
842 |
* memory. Mac OSX doesn't have these functions, but \b malloc of OSX |
843 |
* returns the pointer to the aligned memory block. |
844 |
*/ |
845 |
void |
846 |
fill_array64 (uint64_t * array, int size) |
847 |
{ |
848 |
assert (initialized); |
849 |
assert (idx == N32); |
850 |
assert (size % 2 == 0); |
851 |
assert (size >= N64); |
852 |
|
853 |
gen_rand_array (reinterpret_cast<w128_t *> (array), size / 2); |
854 |
idx = N32; |
855 |
|
856 |
#if defined(BIG_ENDIAN64) && !defined(ONLY64) |
857 |
swap (reinterpret_cast<w128_t *> (array), size / 2); |
858 |
#endif |
859 |
} |
860 |
|
861 |
/** |
862 |
* This function initializes the internal state array with a 32-bit |
863 |
* integer seed. |
864 |
* |
865 |
* @param seed a 32-bit integer used as the seed. |
866 |
*/ |
867 |
void |
868 |
init_gen_rand (uint32_t seed) |
869 |
{ |
870 |
int i; |
871 |
|
872 |
psfmt32[idxof (0)] = seed; |
873 |
for (i = 1; i < N32; i++) |
874 |
{ |
875 |
psfmt32[idxof (i)] = 1812433253UL * (psfmt32[idxof (i - 1)] ^ (psfmt32[idxof (i - 1)] >> 30)) + i; |
876 |
} |
877 |
idx = N32; |
878 |
period_certification (); |
879 |
initialized = 1; |
880 |
} |
881 |
|
882 |
/** |
883 |
* This function initializes the internal state array, |
884 |
* with an array of 32-bit integers used as the seeds |
885 |
* @param init_key the array of 32-bit integers, used as a seed. |
886 |
* @param key_length the length of init_key. |
887 |
*/ |
888 |
void |
889 |
init_by_array (uint32_t * init_key, int key_length) |
890 |
{ |
891 |
int i, j, count; |
892 |
uint32_t r; |
893 |
int lag; |
894 |
int mid; |
895 |
int size = N * 4; |
896 |
|
897 |
if (size >= 623) |
898 |
{ |
899 |
lag = 11; |
900 |
} |
901 |
else if (size >= 68) |
902 |
{ |
903 |
lag = 7; |
904 |
} |
905 |
else if (size >= 39) |
906 |
{ |
907 |
lag = 5; |
908 |
} |
909 |
else |
910 |
{ |
911 |
lag = 3; |
912 |
} |
913 |
mid = (size - lag) / 2; |
914 |
|
915 |
memset (sfmt, 0x8b, sizeof (sfmt)); |
916 |
if (key_length + 1 > N32) |
917 |
{ |
918 |
count = key_length + 1; |
919 |
} |
920 |
else |
921 |
{ |
922 |
count = N32; |
923 |
} |
924 |
r = func1 (psfmt32[idxof (0)] ^ psfmt32[idxof (mid)] ^ psfmt32[idxof (N32 - 1)]); |
925 |
psfmt32[idxof (mid)] += r; |
926 |
r += key_length; |
927 |
psfmt32[idxof (mid + lag)] += r; |
928 |
psfmt32[idxof (0)] = r; |
929 |
|
930 |
count--; |
931 |
for (i = 1, j = 0; (j < count) && (j < key_length); j++) |
932 |
{ |
933 |
r = func1 (psfmt32[idxof (i)] ^ psfmt32[idxof ((i + mid) % N32)] ^ psfmt32[idxof ((i + N32 - 1) % N32)]); |
934 |
psfmt32[idxof ((i + mid) % N32)] += r; |
935 |
r += init_key[j] + i; |
936 |
psfmt32[idxof ((i + mid + lag) % N32)] += r; |
937 |
psfmt32[idxof (i)] = r; |
938 |
i = (i + 1) % N32; |
939 |
} |
940 |
for (; j < count; j++) |
941 |
{ |
942 |
r = func1 (psfmt32[idxof (i)] ^ psfmt32[idxof ((i + mid) % N32)] ^ psfmt32[idxof ((i + N32 - 1) % N32)]); |
943 |
psfmt32[idxof ((i + mid) % N32)] += r; |
944 |
r += i; |
945 |
psfmt32[idxof ((i + mid + lag) % N32)] += r; |
946 |
psfmt32[idxof (i)] = r; |
947 |
i = (i + 1) % N32; |
948 |
} |
949 |
for (j = 0; j < N32; j++) |
950 |
{ |
951 |
r = func2 (psfmt32[idxof (i)] + psfmt32[idxof ((i + mid) % N32)] + psfmt32[idxof ((i + N32 - 1) % N32)]); |
952 |
psfmt32[idxof ((i + mid) % N32)] ^= r; |
953 |
r -= i; |
954 |
psfmt32[idxof ((i + mid + lag) % N32)] ^= r; |
955 |
psfmt32[idxof (i)] = r; |
956 |
i = (i + 1) % N32; |
957 |
} |
958 |
|
959 |
idx = N32; |
960 |
period_certification (); |
961 |
initialized = 1; |
962 |
} |