/build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_mtgp32.h Source File

/build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_mtgp32.h Source File#

API library: /build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_mtgp32.h Source File
rocrand_mtgp32.h
1// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21/*
22 * Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
23 * University. All rights reserved.
24 * Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
25 * University and University of Tokyo. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions are
29 * met:
30 *
31 * * Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * * Redistributions in binary form must reproduce the above
34 * copyright notice, this list of conditions and the following
35 * disclaimer in the documentation and/or other materials provided
36 * with the distribution.
37 * * Neither the name of the Hiroshima University nor the names of
38 * its contributors may be used to endorse or promote products
39 * derived from this software without specific prior written
40 * permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 */
54
55#ifndef ROCRAND_MTGP32_H_
56#define ROCRAND_MTGP32_H_
57
58#include "rocrand/rocrand.h"
59
60#include <hip/hip_runtime.h>
61
62#include <stdlib.h>
63#include <string.h>
64
65#define MTGP_MEXP 11213
66#define MTGP_N 351
67#define MTGP_FLOOR_2P 256
68#define MTGP_CEIL_2P 512
69#define MTGP_TN MTGP_FLOOR_2P
70#define MTGP_LS (MTGP_TN * 3)
71#define MTGP_BN_MAX 512
72#define MTGP_TS 16
73#define MTGP_STATE 1024
74#define MTGP_MASK 1023
75
76// Source: https://github.com/MersenneTwister-Lab/MTGP/blob/master/mtgp32-fast.h
98 int mexp;
99 int pos;
100 int sh1;
101 int sh2;
102 uint32_t tbl[16];
103 uint32_t tmp_tbl[16];
104 uint32_t flt_tmp_tbl[16];
105 uint32_t mask;
106 unsigned char poly_sha1[21];
107};
108
109namespace rocrand_device {
110
111struct mtgp32_params
112{
113 unsigned int pos_tbl[MTGP_BN_MAX];
114 unsigned int param_tbl[MTGP_BN_MAX][MTGP_TS];
115 unsigned int temper_tbl[MTGP_BN_MAX][MTGP_TS];
116 unsigned int single_temper_tbl[MTGP_BN_MAX][MTGP_TS];
117 unsigned int sh1_tbl[MTGP_BN_MAX];
118 unsigned int sh2_tbl[MTGP_BN_MAX];
119 unsigned int mask[1];
120};
121
122typedef mtgp32_params_fast_t mtgp32_fast_params;
123
124struct mtgp32_state
125{
126 int offset;
127 int id;
128 unsigned int status[MTGP_STATE];
129};
130
131inline
132void rocrand_mtgp32_init_state(unsigned int array[],
133 const mtgp32_fast_params *para, unsigned int seed)
134{
135 int i;
136 int size = para->mexp / 32 + 1;
137 unsigned int hidden_seed;
138 unsigned int tmp;
139 hidden_seed = para->tbl[4] ^ (para->tbl[8] << 16);
140 tmp = hidden_seed;
141 tmp += tmp >> 16;
142 tmp += tmp >> 8;
143 memset(array, tmp & 0xff, sizeof(unsigned int) * size);
144 array[0] = seed;
145 array[1] = hidden_seed;
146 for (i = 1; i < size; i++)
147 array[i] ^= (1812433253) * (array[i - 1] ^ (array[i - 1] >> 30)) + i;
148}
149
150class mtgp32_engine
151{
152public:
153 __forceinline__ __device__ __host__
154 // Initialization is not supported for __shared__ variables
155 mtgp32_engine() // cppcheck-suppress uninitMemberVar
156 {
157
158 }
159
160 __forceinline__ __device__ __host__ mtgp32_engine(const mtgp32_state& m_state,
161 const mtgp32_params* params,
162 int bid)
163 {
164 this->m_state = m_state;
165 pos_tbl = params->pos_tbl[bid];
166 sh1_tbl = params->sh1_tbl[bid];
167 sh2_tbl = params->sh2_tbl[bid];
168 mask = params->mask[0];
169 for (int j = 0; j < MTGP_TS; j++) {
170 param_tbl[j] = params->param_tbl[bid][j];
171 temper_tbl[j] = params->temper_tbl[bid][j];
172 single_temper_tbl[j] = params->single_temper_tbl[bid][j];
173 }
174 }
175
176 __forceinline__ __device__ __host__ void copy(const mtgp32_engine* m_engine)
177 {
178#if defined(__HIP_DEVICE_COMPILE__)
179 const unsigned int thread_id = threadIdx.x;
180 for(int i = thread_id; i < MTGP_STATE; i += blockDim.x)
181 m_state.status[i] = m_engine->m_state.status[i];
182
183 if (thread_id == 0)
184 {
185 m_state.offset = m_engine->m_state.offset;
186 m_state.id = m_engine->m_state.id;
187 pos_tbl = m_engine->pos_tbl;
188 sh1_tbl = m_engine->sh1_tbl;
189 sh2_tbl = m_engine->sh2_tbl;
190 mask = m_engine->mask;
191 }
192 if (thread_id < MTGP_TS)
193 {
194 param_tbl[thread_id] = m_engine->param_tbl[thread_id];
195 temper_tbl[thread_id] = m_engine->temper_tbl[thread_id];
196 single_temper_tbl[thread_id] = m_engine->single_temper_tbl[thread_id];
197 }
198 __syncthreads();
199#else
200 this->m_state = m_engine->m_state;
201 pos_tbl = m_engine->pos_tbl;
202 sh1_tbl = m_engine->sh1_tbl;
203 sh2_tbl = m_engine->sh2_tbl;
204 mask = m_engine->mask;
205 for (int j = 0; j < MTGP_TS; j++) {
206 param_tbl[j] = m_engine->param_tbl[j];
207 temper_tbl[j] = m_engine->temper_tbl[j];
208 single_temper_tbl[j] = m_engine->single_temper_tbl[j];
209 }
210#endif
211 }
212
213 __forceinline__ __device__ __host__ void set_params(mtgp32_params* params)
214 {
215 pos_tbl = params->pos_tbl[m_state.id];
216 sh1_tbl = params->sh1_tbl[m_state.id];
217 sh2_tbl = params->sh2_tbl[m_state.id];
218 mask = params->mask[0];
219 for (int j = 0; j < MTGP_TS; j++) {
220 param_tbl[j] = params->param_tbl[m_state.id][j];
221 temper_tbl[j] = params->temper_tbl[m_state.id][j];
222 single_temper_tbl[j] = params->single_temper_tbl[m_state.id][j];
223 }
224 }
225
226 __forceinline__ __device__ __host__ unsigned int operator()()
227 {
228 return this->next();
229 }
230
231 __forceinline__ __device__ __host__ unsigned int next()
232 {
233#ifdef __HIP_DEVICE_COMPILE__
234 unsigned int o = next_thread(threadIdx.x);
235 __syncthreads();
236 if(threadIdx.x == 0)
237 {
238 m_state.offset = (m_state.offset + blockDim.x) & MTGP_MASK;
239 }
240 __syncthreads();
241 return o;
242#else
243 return 0;
244#endif
245 }
246
247 __forceinline__ __device__ __host__ unsigned int next_single()
248 {
249#if defined(__HIP_DEVICE_COMPILE__)
250 unsigned int t = threadIdx.x;
251 unsigned int d = blockDim.x;
252 int pos = pos_tbl;
253 unsigned int r;
254 unsigned int o;
255
256 r = para_rec(m_state.status[(t + m_state.offset) & MTGP_MASK],
257 m_state.status[(t + m_state.offset + 1) & MTGP_MASK],
258 m_state.status[(t + m_state.offset + pos) & MTGP_MASK]);
259 m_state.status[(t + m_state.offset + MTGP_N) & MTGP_MASK] = r;
260
261 o = temper_single(r, m_state.status[(t + m_state.offset + pos - 1) & MTGP_MASK]);
262 __syncthreads();
263 if (t == 0)
264 m_state.offset = (m_state.offset + d) & MTGP_MASK;
265 __syncthreads();
266 return o;
267#else
268 return 0;
269#endif
270 }
271
272private:
273 __forceinline__ __device__ __host__ unsigned int
274 para_rec(unsigned int X1, unsigned int X2, unsigned int Y) const
275 {
276 unsigned int X = (X1 & mask) ^ X2;
277 unsigned int MAT;
278
279 X ^= X << sh1_tbl;
280 Y = X ^ (Y >> sh2_tbl);
281 MAT = param_tbl[Y & 0x0f];
282 return Y ^ MAT;
283 }
284
285 __forceinline__ __device__ __host__ unsigned int temper(unsigned int V, unsigned int T) const
286 {
287 unsigned int MAT;
288
289 T ^= T >> 16;
290 T ^= T >> 8;
291 MAT = temper_tbl[T & 0x0f];
292 return V ^ MAT;
293 }
294
295 __forceinline__ __device__ __host__ unsigned int temper_single(unsigned int V,
296 unsigned int T) const
297 {
298 unsigned int MAT;
299 unsigned int r;
300
301 T ^= T >> 16;
302 T ^= T >> 8;
303 MAT = single_temper_tbl[T & 0x0f];
304 r = (V >> 9) ^ MAT;
305 return r;
306 }
307
308protected:
311 __forceinline__ __device__ __host__ unsigned int next_thread(unsigned int thread_idx)
312 {
313 const unsigned int r
314 = para_rec(m_state.status[(thread_idx + m_state.offset) & MTGP_MASK],
315 m_state.status[(thread_idx + m_state.offset + 1) & MTGP_MASK],
316 m_state.status[(thread_idx + m_state.offset + pos_tbl) & MTGP_MASK]);
317 m_state.status[(thread_idx + m_state.offset + MTGP_N) & MTGP_MASK] = r;
318 return temper(r, m_state.status[(thread_idx + m_state.offset + pos_tbl - 1) & MTGP_MASK]);
319 }
320
321public:
322 // State
323 mtgp32_state m_state;
324 // Parameters
325 unsigned int pos_tbl;
326 unsigned int param_tbl[MTGP_TS];
327 unsigned int temper_tbl[MTGP_TS];
328 unsigned int sh1_tbl;
329 unsigned int sh2_tbl;
330 unsigned int single_temper_tbl[MTGP_TS];
331 unsigned int mask;
332
333}; // mtgp32_engine class
334
335} // end namespace rocrand_device
336
341
343typedef rocrand_device::mtgp32_engine rocrand_state_mtgp32;
344typedef rocrand_device::mtgp32_state mtgp32_state;
345typedef rocrand_device::mtgp32_fast_params mtgp32_fast_params;
346typedef rocrand_device::mtgp32_params mtgp32_params;
348
364__host__
365inline rocrand_status rocrand_make_state_mtgp32(rocrand_state_mtgp32* state,
366 mtgp32_fast_params params[],
367 int n,
368 unsigned long long seed)
369{
370 int i;
371 rocrand_state_mtgp32 * h_state = (rocrand_state_mtgp32 *) malloc(sizeof(rocrand_state_mtgp32) * n);
372 seed = seed ^ (seed >> 32);
373
374 if (h_state == NULL)
376
377 for (i = 0; i < n; i++) {
378 rocrand_device::rocrand_mtgp32_init_state(&(h_state[i].m_state.status[0]), &params[i], (unsigned int)seed + i + 1);
379 h_state[i].m_state.offset = 0;
380 h_state[i].m_state.id = i;
381 h_state[i].pos_tbl = params[i].pos;
382 h_state[i].sh1_tbl = params[i].sh1;
383 h_state[i].sh2_tbl = params[i].sh2;
384 h_state[i].mask = params[0].mask;
385 for (int j = 0; j < MTGP_TS; j++) {
386 h_state[i].param_tbl[j] = params[i].tbl[j];
387 h_state[i].temper_tbl[j] = params[i].tmp_tbl[j];
388 h_state[i].single_temper_tbl[j] = params[i].flt_tmp_tbl[j];
389 }
390 }
391
392 const hipError_t error
393 = hipMemcpy(state, h_state, sizeof(rocrand_state_mtgp32) * n, hipMemcpyDefault);
394 free(h_state);
395
396 if(error != hipSuccess)
398
400}
401
418__host__
419inline rocrand_status rocrand_make_constant(const mtgp32_fast_params params[], mtgp32_params* p)
420{
421 const int block_num = MTGP_BN_MAX;
422 const int size1 = sizeof(uint32_t) * block_num;
423 const int size2 = sizeof(uint32_t) * block_num * MTGP_TS;
424 uint32_t *h_pos_tbl;
425 uint32_t *h_sh1_tbl;
426 uint32_t *h_sh2_tbl;
427 uint32_t *h_param_tbl;
428 uint32_t *h_temper_tbl;
429 uint32_t *h_single_temper_tbl;
430 uint32_t *h_mask;
431 h_pos_tbl = (uint32_t *)malloc(size1);
432 h_sh1_tbl = (uint32_t *)malloc(size1);
433 h_sh2_tbl = (uint32_t *)malloc(size1);
434 h_param_tbl = (uint32_t *)malloc(size2);
435 h_temper_tbl = (uint32_t *)malloc(size2);
436 h_single_temper_tbl = (uint32_t *)malloc(size2);
437 h_mask = (uint32_t *)malloc(sizeof(uint32_t));
439
440 if (h_pos_tbl == NULL || h_sh1_tbl == NULL || h_sh2_tbl == NULL
441 || h_param_tbl == NULL || h_temper_tbl == NULL || h_single_temper_tbl == NULL
442 || h_mask == NULL) {
443 printf("failure in allocating host memory for constant table.\n");
445 }
446 else {
447 h_mask[0] = params[0].mask;
448 for (int i = 0; i < block_num; i++) {
449 h_pos_tbl[i] = params[i].pos;
450 h_sh1_tbl[i] = params[i].sh1;
451 h_sh2_tbl[i] = params[i].sh2;
452 for (int j = 0; j < MTGP_TS; j++) {
453 h_param_tbl[i * MTGP_TS + j] = params[i].tbl[j];
454 h_temper_tbl[i * MTGP_TS + j] = params[i].tmp_tbl[j];
455 h_single_temper_tbl[i * MTGP_TS + j] = params[i].flt_tmp_tbl[j];
456 }
457 }
458
459 if (hipMemcpy(p->pos_tbl, h_pos_tbl, size1, hipMemcpyHostToDevice) != hipSuccess)
461 if (hipMemcpy(p->sh1_tbl, h_sh1_tbl, size1, hipMemcpyHostToDevice) != hipSuccess)
463 if (hipMemcpy(p->sh2_tbl, h_sh2_tbl, size1, hipMemcpyHostToDevice) != hipSuccess)
465 if (hipMemcpy(p->param_tbl, h_param_tbl, size2, hipMemcpyHostToDevice) != hipSuccess)
467 if (hipMemcpy(p->temper_tbl, h_temper_tbl, size2, hipMemcpyHostToDevice) != hipSuccess)
469 if (hipMemcpy(p->single_temper_tbl, h_single_temper_tbl, size2, hipMemcpyHostToDevice) != hipSuccess)
471 if (hipMemcpy(p->mask, h_mask, sizeof(unsigned int), hipMemcpyHostToDevice) != hipSuccess)
473 }
474
475 free(h_pos_tbl);
476 free(h_sh1_tbl);
477 free(h_sh2_tbl);
478 free(h_param_tbl);
479 free(h_temper_tbl);
480 free(h_single_temper_tbl);
481 free(h_mask);
482
483 return status;
484}
485
498__forceinline__ __device__
499unsigned int rocrand(rocrand_state_mtgp32* state)
500{
501 return state->next();
502}
503
535__forceinline__ __device__
536void rocrand_mtgp32_block_copy(rocrand_state_mtgp32* src, rocrand_state_mtgp32* dest)
537{
538 dest->copy(src);
539}
540
547__forceinline__ __device__
548void rocrand_mtgp32_set_params(rocrand_state_mtgp32* state, mtgp32_params* params)
549{
550 state->set_params(params);
551}
552 // end of group rocranddevice
554
555#endif // ROCRAND_MTGP32_H_
__forceinline__ __device__ void rocrand_mtgp32_block_copy(rocrand_state_mtgp32 *src, rocrand_state_mtgp32 *dest)
Copies MTGP32 state to another state using block of threads.
Definition rocrand_mtgp32.h:536
__host__ rocrand_status rocrand_make_state_mtgp32(rocrand_state_mtgp32 *state, mtgp32_fast_params params[], int n, unsigned long long seed)
Initializes MTGP32 states.
Definition rocrand_mtgp32.h:365
__forceinline__ __device__ unsigned int rocrand(rocrand_state_mtgp32 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition rocrand_mtgp32.h:499
__host__ rocrand_status rocrand_make_constant(const mtgp32_fast_params params[], mtgp32_params *p)
Loads parameters for MTGP32.
Definition rocrand_mtgp32.h:419
__forceinline__ __device__ void rocrand_mtgp32_set_params(rocrand_state_mtgp32 *state, mtgp32_params *params)
Changes parameters of a MTGP32 state.
Definition rocrand_mtgp32.h:548
rocrand_status
rocRAND function call status type
Definition rocrand.h:61
@ ROCRAND_STATUS_SUCCESS
No errors.
Definition rocrand.h:62
@ ROCRAND_STATUS_ALLOCATION_FAILED
Memory allocation failed during execution.
Definition rocrand.h:65
Definition rocrand_mtgp32.h:97
uint32_t tmp_tbl[16]
Definition rocrand_mtgp32.h:103
int pos
Definition rocrand_mtgp32.h:99
int mexp
Definition rocrand_mtgp32.h:98
int sh2
Definition rocrand_mtgp32.h:101
int sh1
Definition rocrand_mtgp32.h:100
uint32_t mask
Definition rocrand_mtgp32.h:105
unsigned char poly_sha1[21]
Definition rocrand_mtgp32.h:106
uint32_t tbl[16]
Definition rocrand_mtgp32.h:102
uint32_t flt_tmp_tbl[16]
Definition rocrand_mtgp32.h:104