/build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_lfsr113.h Source File

/build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_lfsr113.h Source File#

API library: /build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_lfsr113.h Source File
rocrand_lfsr113.h
1// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21#ifndef ROCRAND_LFSR113_H_
22#define ROCRAND_LFSR113_H_
23
24#include "rocrand/rocrand_lfsr113_precomputed.h"
25
26#include <hip/hip_runtime.h>
27
34#define ROCRAND_LFSR113_DEFAULT_SEED_X 2
37#define ROCRAND_LFSR113_DEFAULT_SEED_Y 8
40#define ROCRAND_LFSR113_DEFAULT_SEED_Z 16
43#define ROCRAND_LFSR113_DEFAULT_SEED_W 128 // end of group rocranddevice
45
46namespace rocrand_device
47{
48namespace detail
49{
50
51__forceinline__ __device__ __host__ void mul_mat_vec_inplace(const unsigned int* m, uint4* z)
52{
53 unsigned int v[4] = {z->x, z->y, z->z, z->w};
54 unsigned int r[LFSR113_N] = {0};
55 for(int ij = 0; ij < LFSR113_N * LFSR113_M; ij++)
56 {
57 const int i = ij / LFSR113_M;
58 const int j = ij % LFSR113_M;
59 const unsigned int b = (v[i] & (1U << j)) ? 0xffffffff : 0x0;
60 for(int k = 0; k < LFSR113_N; k++)
61 {
62 r[k] ^= b & m[i * LFSR113_M * LFSR113_N + j * LFSR113_N + k];
63 }
64 }
65 // Copy result into z
66 z->x = r[0];
67 z->y = r[1];
68 z->z = r[2];
69 z->w = r[3];
70}
71} // end namespace detail
72
73class lfsr113_engine
74{
75public:
76 struct lfsr113_state
77 {
78 uint4 z;
79 uint4 subsequence;
80 };
81
87 __forceinline__ __device__ __host__ lfsr113_engine(const uint4 seed
92 const unsigned int subsequence = 0,
93 const unsigned long long offset = 0)
94 {
95 this->seed(seed, subsequence, offset);
96 }
97
103 __forceinline__ __device__ __host__ void seed(uint4 seed_value,
104 const unsigned long long subsequence,
105 const unsigned long long offset = 0)
106 {
107 m_state.subsequence = seed_value;
108
109 reset_start_subsequence();
110 discard_subsequence(subsequence);
111 discard(offset);
112 }
113
115 __forceinline__ __device__ __host__ void discard()
116 {
117 discard_state();
118 }
119
121 __forceinline__ __device__ __host__ void discard(unsigned long long offset)
122 {
123#ifdef __HIP_DEVICE_COMPILE__
124 jump(offset, d_lfsr113_jump_matrices);
125#else
126 jump(offset, h_lfsr113_jump_matrices);
127#endif
128 }
129
132 __forceinline__ __device__ __host__ void discard_subsequence(unsigned int subsequence)
133 {
134// Discard n * 2^55 samples
135#ifdef __HIP_DEVICE_COMPILE__
136 jump(subsequence, d_lfsr113_sequence_jump_matrices);
137#else
138 jump(subsequence, h_lfsr113_sequence_jump_matrices);
139#endif
140 }
141
142 __forceinline__ __device__ __host__ unsigned int operator()()
143 {
144 return next();
145 }
146
147 __forceinline__ __device__ __host__ unsigned int next()
148 {
149 unsigned int b;
150
151 b = (((m_state.z.x << 6) ^ m_state.z.x) >> 13);
152 m_state.z.x = (((m_state.z.x & 4294967294U) << 18) ^ b);
153
154 b = (((m_state.z.y << 2) ^ m_state.z.y) >> 27);
155 m_state.z.y = (((m_state.z.y & 4294967288U) << 2) ^ b);
156
157 b = (((m_state.z.z << 13) ^ m_state.z.z) >> 21);
158 m_state.z.z = (((m_state.z.z & 4294967280U) << 7) ^ b);
159
160 b = (((m_state.z.w << 3) ^ m_state.z.w) >> 12);
161 m_state.z.w = (((m_state.z.w & 4294967168U) << 13) ^ b);
162
163 return (m_state.z.x ^ m_state.z.y ^ m_state.z.z ^ m_state.z.w);
164 }
165
166protected:
168 __forceinline__ __device__ __host__ void reset_start_subsequence()
169 {
170 m_state.z.x = m_state.subsequence.x;
171 m_state.z.y = m_state.subsequence.y;
172 m_state.z.z = m_state.subsequence.z;
173 m_state.z.w = m_state.subsequence.w;
174 }
175
176 // Advances the internal state to the next state.
177 __forceinline__ __device__ __host__ void discard_state()
178 {
179 this->next();
180 }
181
182 __forceinline__ __device__ __host__ void
183 jump(unsigned long long v,
184 const unsigned int (&jump_matrices)[LFSR113_JUMP_MATRICES][LFSR113_SIZE])
185 {
186 // x~(n + v) = (A^v mod m)x~n mod m
187 // The matrix (A^v mod m) can be precomputed for selected values of v.
188 //
189 // For LFSR113_JUMP_LOG2 = 2
190 // lfsr113_jump_matrices contains precomputed matrices:
191 // A^1, A^4, A^16...
192 //
193 // For LFSR113_JUMP_LOG2 = 2 and LFSR113_SEQUENCE_JUMP_LOG2 = 55
194 // lfsr113_sequence_jump_matrices contains precomputed matrices:
195 // A^(1 * 2^55), A^(4 * 2^55), A^(16 * 2^55)...
196 //
197 // Intermediate powers can be calculated as multiplication of the powers above.
198
199 unsigned int mi = 0;
200 while(v > 0)
201 {
202 const unsigned int is = static_cast<unsigned int>(v) & ((1 << LFSR113_JUMP_LOG2) - 1);
203 for(unsigned int i = 0; i < is; i++)
204 {
205 detail::mul_mat_vec_inplace(jump_matrices[mi], &m_state.z);
206 }
207 mi++;
208 v >>= LFSR113_JUMP_LOG2;
209 }
210 }
211
212protected:
213 lfsr113_state m_state;
214
215}; // lfsr113_engine class
216
217} // end namespace rocrand_device
218
223
225typedef rocrand_device::lfsr113_engine rocrand_state_lfsr113;
227
238__forceinline__ __device__ __host__
239void rocrand_init(const uint4 seed, const unsigned int subsequence, rocrand_state_lfsr113* state)
240{
241 *state = rocrand_state_lfsr113(seed, subsequence);
242}
243
255__forceinline__ __device__ __host__
256void rocrand_init(const uint4 seed,
257 const unsigned int subsequence,
258 const unsigned long long offset,
259 rocrand_state_lfsr113* state)
260{
261 *state = rocrand_state_lfsr113(seed, subsequence, offset);
262}
263
276__forceinline__ __device__ __host__
277unsigned int rocrand(rocrand_state_lfsr113* state)
278{
279 return state->next();
280}
281
290__forceinline__ __device__ __host__
291void skipahead(unsigned long long offset, rocrand_state_lfsr113* state)
292{
293 return state->discard(offset);
294}
295
305__forceinline__ __device__ __host__
306void skipahead_subsequence(unsigned int subsequence, rocrand_state_lfsr113* state)
307{
308 return state->discard_subsequence(subsequence);
309}
310
320__forceinline__ __device__ __host__
321void skipahead_sequence(unsigned int sequence, rocrand_state_lfsr113* state)
322{
323 return state->discard_subsequence(sequence);
324}
325 // end of group rocranddevice
327
328#endif // ROCRAND_LFSR113_H_
__forceinline__ __device__ __host__ void rocrand_init(const uint4 seed, const unsigned int subsequence, rocrand_state_lfsr113 *state)
Initializes LFSR113 state.
Definition rocrand_lfsr113.h:239
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_lfsr113 *state)
Updates LFSR113 state to skip ahead by offset elements.
Definition rocrand_lfsr113.h:291
#define ROCRAND_LFSR113_DEFAULT_SEED_Y
Default Y seed for LFSR113 PRNG.
Definition rocrand_lfsr113.h:37
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned int sequence, rocrand_state_lfsr113 *state)
Updates LFSR113 state to skip ahead by sequence sequences.
Definition rocrand_lfsr113.h:321
#define ROCRAND_LFSR113_DEFAULT_SEED_W
Default W seed for LFSR113 PRNG.
Definition rocrand_lfsr113.h:43
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned int subsequence, rocrand_state_lfsr113 *state)
Updates LFSR113 state to skip ahead by subsequence subsequences.
Definition rocrand_lfsr113.h:306
#define ROCRAND_LFSR113_DEFAULT_SEED_Z
Default Z seed for LFSR113 PRNG.
Definition rocrand_lfsr113.h:40
#define ROCRAND_LFSR113_DEFAULT_SEED_X
Default X seed for LFSR113 PRNG.
Definition rocrand_lfsr113.h:34
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_lfsr113 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition rocrand_lfsr113.h:277