001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.rng.core.source32; 019 020import org.apache.commons.rng.ArbitrarilyJumpableUniformRandomProvider; 021import org.apache.commons.rng.JumpableUniformRandomProvider; 022import org.apache.commons.rng.LongJumpableUniformRandomProvider; 023import org.apache.commons.rng.UniformRandomProvider; 024import org.apache.commons.rng.core.util.NumberFactory; 025 026import java.util.Arrays; 027import java.util.stream.Stream; 028 029/** 030 * This class implements the Philox4x32 128-bit counter-based generator with 10 rounds. 031 * 032 * <p>This is a member of the Philox family of generators. Memory footprint is 192 bits 033 * and the period is 2<sup>130</sup>.</p> 034 * 035 * <p>Jumping in the sequence is essentially instantaneous. 036 * This generator provides both subsequences and arbitrary jumps for easy parallelization. 037 * 038 * <p>References: 039 * <ol> 040 * <li> 041 * Salmon, J.K. <i>et al</i> (2011) 042 * <a href="https://dl.acm.org/doi/epdf/10.1145/2063384.2063405"> 043 * Parallel Random Numbers: As Easy as 1,2,3</a>.</li> 044 * </ol> 045 * 046 * @since 1.7 047 */ 048public final class Philox4x32 extends IntProvider implements LongJumpableUniformRandomProvider, 049 ArbitrarilyJumpableUniformRandomProvider { 050 /** Philox 32-bit mixing constant for counter 0. */ 051 private static final int K_PHILOX_10_A = 0x9E3779B9; 052 /** Philox 32-bit mixing constant for counter 1. */ 053 private static final int K_PHILOX_10_B = 0xBB67AE85; 054 /** Philox 32-bit constant for key 0. */ 055 private static final int K_PHILOX_SA = 0xD2511F53; 056 /** Philox 32-bit constant for key 1. */ 057 private static final int K_PHILOX_SB = 0xCD9E8D57; 058 /** Internal buffer size. */ 059 private static final int PHILOX_BUFFER_SIZE = 4; 060 /** Number of state variables. */ 061 private static final int STATE_SIZE = 7; 062 /** The base-2 logarithm of the period. */ 063 private static final int LOG_PERIOD = 130; 064 /** The period of 2^130 as a double. */ 065 private static final double PERIOD = 0x1.0p130; 066 /** 2^54. Threshold for a double that cannot have the 2 least 067 * significant bits set when converted to a long. */ 068 private static final double TWO_POW_54 = 0x1.0p54; 069 070 /** Counter 0. */ 071 private int counter0; 072 /** Counter 1. */ 073 private int counter1; 074 /** Counter 2. */ 075 private int counter2; 076 /** Counter 3. */ 077 private int counter3; 078 /** Output buffer. */ 079 private final int[] buffer = new int[PHILOX_BUFFER_SIZE]; 080 /** Key low bits. */ 081 private int key0; 082 /** Key high bits. */ 083 private int key1; 084 /** Output buffer index. When at the end of the buffer the counter is 085 * incremented and the buffer regenerated. */ 086 private int bufferPosition; 087 088 /** 089 * Creates a new instance based on an array of int containing, key (first two ints) and 090 * the counter (next 4 ints, low bits = first int). The counter is not scrambled and may 091 * be used to create contiguous blocks with size a multiple of 4 ints. For example, 092 * setting seed[2] = 1 is equivalent to start with seed[2]=0 and calling {@link #next()} 4 times. 093 * 094 * @param seed Array of size 6 defining key0,key1,counter0,counter1,counter2,counter3. 095 * If the size is smaller, zero values are assumed. 096 */ 097 public Philox4x32(int[] seed) { 098 final int[] input = seed.length < 6 ? Arrays.copyOf(seed, 6) : seed; 099 setState(input); 100 bufferPosition = PHILOX_BUFFER_SIZE; 101 } 102 103 /** 104 * Copy constructor. 105 * 106 * @param source Source to copy. 107 */ 108 private Philox4x32(Philox4x32 source) { 109 super(source); 110 counter0 = source.counter0; 111 counter1 = source.counter1; 112 counter2 = source.counter2; 113 counter3 = source.counter3; 114 key0 = source.key0; 115 key1 = source.key1; 116 bufferPosition = source.bufferPosition; 117 System.arraycopy(source.buffer, 0, buffer, 0, PHILOX_BUFFER_SIZE); 118 } 119 120 /** 121 * Copies the state from the array into the generator state. 122 * 123 * @param state New state. 124 */ 125 private void setState(int[] state) { 126 key0 = state[0]; 127 key1 = state[1]; 128 counter0 = state[2]; 129 counter1 = state[3]; 130 counter2 = state[4]; 131 counter3 = state[5]; 132 } 133 134 /** {@inheritDoc} */ 135 @Override 136 protected byte[] getStateInternal() { 137 return composeStateInternal( 138 NumberFactory.makeByteArray(new int[] { 139 key0, key1, 140 counter0, counter1, counter2, counter3, 141 bufferPosition}), 142 super.getStateInternal()); 143 } 144 145 /** {@inheritDoc} */ 146 @Override 147 protected void setStateInternal(byte[] s) { 148 final byte[][] c = splitStateInternal(s, STATE_SIZE * Integer.BYTES); 149 final int[] state = NumberFactory.makeIntArray(c[0]); 150 setState(state); 151 bufferPosition = state[6]; 152 super.setStateInternal(c[1]); 153 // Regenerate the internal buffer 154 rand10(); 155 } 156 157 /** {@inheritDoc} */ 158 @Override 159 public int next() { 160 final int p = bufferPosition; 161 if (p < PHILOX_BUFFER_SIZE) { 162 bufferPosition = p + 1; 163 return buffer[p]; 164 } 165 incrementCounter(); 166 rand10(); 167 bufferPosition = 1; 168 return buffer[0]; 169 } 170 171 /** 172 * Increment the counter by one. 173 */ 174 private void incrementCounter() { 175 counter0++; 176 if (counter0 != 0) { 177 return; 178 } 179 counter1++; 180 if (counter1 != 0) { 181 return; 182 } 183 counter2++; 184 if (counter2 != 0) { 185 return; 186 } 187 counter3++; 188 } 189 190 /** 191 * Perform 10 rounds, using counter0, counter1, counter2, counter3 as starting point. 192 * It updates the buffer member variable, but no others. 193 */ 194 private void rand10() { 195 buffer[0] = counter0; 196 buffer[1] = counter1; 197 buffer[2] = counter2; 198 buffer[3] = counter3; 199 200 int k0 = key0; 201 int k1 = key1; 202 203 //unrolled loop for performance 204 singleRound(buffer, k0, k1); 205 k0 += K_PHILOX_10_A; 206 k1 += K_PHILOX_10_B; 207 singleRound(buffer, k0, k1); 208 k0 += K_PHILOX_10_A; 209 k1 += K_PHILOX_10_B; 210 singleRound(buffer, k0, k1); 211 k0 += K_PHILOX_10_A; 212 k1 += K_PHILOX_10_B; 213 singleRound(buffer, k0, k1); 214 k0 += K_PHILOX_10_A; 215 k1 += K_PHILOX_10_B; 216 singleRound(buffer, k0, k1); 217 k0 += K_PHILOX_10_A; 218 k1 += K_PHILOX_10_B; 219 singleRound(buffer, k0, k1); 220 k0 += K_PHILOX_10_A; 221 k1 += K_PHILOX_10_B; 222 singleRound(buffer, k0, k1); 223 k0 += K_PHILOX_10_A; 224 k1 += K_PHILOX_10_B; 225 singleRound(buffer, k0, k1); 226 k0 += K_PHILOX_10_A; 227 k1 += K_PHILOX_10_B; 228 singleRound(buffer, k0, k1); 229 k0 += K_PHILOX_10_A; 230 k1 += K_PHILOX_10_B; 231 singleRound(buffer, k0, k1); 232 } 233 234 /** 235 * Performs a single round of philox. 236 * 237 * @param counter Counter, which will be updated after each call. 238 * @param key0 Key low bits. 239 * @param key1 Key high bits. 240 */ 241 private static void singleRound(int[] counter, int key0, int key1) { 242 final long product0 = (K_PHILOX_SA & 0xffff_ffffL) * (counter[0] & 0xffff_ffffL); 243 final int hi0 = (int) (product0 >>> 32); 244 final int lo0 = (int) product0; 245 final long product1 = (K_PHILOX_SB & 0xffff_ffffL) * (counter[2] & 0xffff_ffffL); 246 final int hi1 = (int) (product1 >>> 32); 247 final int lo1 = (int) product1; 248 249 counter[0] = hi1 ^ counter[1] ^ key0; 250 counter[1] = lo1; 251 counter[2] = hi0 ^ counter[3] ^ key1; 252 counter[3] = lo0; 253 } 254 255 /** 256 * {@inheritDoc} 257 * 258 * <p>The jump size is the equivalent of 2<sup>66</sup> 259 * calls to {@link UniformRandomProvider#nextInt() nextInt()}. It can provide 260 * up to 2<sup>64</sup> non-overlapping subsequences.</p> 261 */ 262 @Override 263 public UniformRandomProvider jump() { 264 final Philox4x32 copy = new Philox4x32(this); 265 if (++counter2 == 0) { 266 counter3++; 267 } 268 finishJump(); 269 return copy; 270 } 271 272 /** 273 * {@inheritDoc} 274 * 275 * <p>The jump size is the equivalent of 2<sup>98</sup> calls to 276 * {@link UniformRandomProvider#nextLong() nextLong()}. It can provide up to 277 * 2<sup>32</sup> non-overlapping subsequences of length 2<sup>98</sup>; each 278 * subsequence can provide up to 2<sup>32</sup> non-overlapping subsequences of 279 * length 2<sup>66</sup> using the {@link #jump()} method.</p> 280 */ 281 @Override 282 public JumpableUniformRandomProvider longJump() { 283 final Philox4x32 copy = new Philox4x32(this); 284 counter3++; 285 finishJump(); 286 return copy; 287 } 288 289 /** {@inheritDoc} */ 290 @Override 291 public ArbitrarilyJumpableUniformRandomProvider jump(double distance) { 292 IntJumpDistances.validateJump(distance, PERIOD); 293 // Decompose into an increment for the buffer position and counter 294 final int skip = getBufferPositionIncrement(distance); 295 final int[] increment = getCounterIncrement(distance); 296 return copyAndJump(skip, increment); 297 } 298 299 /** {@inheritDoc} */ 300 @Override 301 public ArbitrarilyJumpableUniformRandomProvider jumpPowerOfTwo(int logDistance) { 302 IntJumpDistances.validateJumpPowerOfTwo(logDistance, LOG_PERIOD); 303 // For simplicity this re-uses code to increment the buffer position and counter 304 // when only one or the other is required for a power of 2. 305 // In practice the jump should be much larger than 1 and the necessary regeneration 306 // of the buffer is the most time consuming step. 307 int skip = 0; 308 final int[] increment = new int[PHILOX_BUFFER_SIZE]; 309 if (logDistance >= 0) { 310 if (logDistance <= 1) { 311 // The first 2 powers update the buffer position. 312 skip = 1 << logDistance; 313 } else { 314 // Remaining powers update the 128-bit counter 315 final int n = logDistance - 2; 316 // Create the increment. 317 // Start at n / 32 with a 1-bit shifted n % 32 318 increment[n >> 5] = 1 << (n & 0x1f); 319 } 320 } 321 return copyAndJump(skip, increment); 322 } 323 324 /** {@inheritDoc} */ 325 @Override 326 public Stream<ArbitrarilyJumpableUniformRandomProvider> jumps(double distance) { 327 IntJumpDistances.validateJump(distance, PERIOD); 328 // Decompose into an increment for the buffer position and counter 329 final int skip = getBufferPositionIncrement(distance); 330 final int[] increment = getCounterIncrement(distance); 331 return Stream.generate(() -> copyAndJump(skip, increment)).sequential(); 332 } 333 334 /** 335 * Gets the buffer position increment from the jump distance. 336 * 337 * @param distance Jump distance. 338 * @return the buffer position increment 339 */ 340 private static int getBufferPositionIncrement(double distance) { 341 return distance < TWO_POW_54 ? 342 // 2 least significant digits from the integer representation 343 (int)((long) distance) & 0x3 : 344 0; 345 } 346 347 /** 348 * Gets the counter increment from the jump distance. 349 * 350 * @param distance Jump distance. 351 * @return the counter increment 352 */ 353 private static int[] getCounterIncrement(double distance) { 354 final int[] increment = new int[PHILOX_BUFFER_SIZE]; 355 // The counter is incremented if the distance is above the buffer size 356 // (increment = distance / 4). 357 if (distance >= PHILOX_BUFFER_SIZE) { 358 IntJumpDistances.writeUnsignedInteger(distance * 0.25, increment); 359 } 360 return increment; 361 } 362 363 /** 364 * Copy the generator and advance the internal state. The copy is returned. 365 * 366 * <p>This method: (1) assumes that the arguments have been validated; 367 * and (2) regenerates the output buffer if required. 368 * 369 * @param skip Amount to skip the buffer position in [0, 3]. 370 * @param increment Unsigned 128-bit increment, least significant bits first. 371 * @return the copy 372 */ 373 private ArbitrarilyJumpableUniformRandomProvider copyAndJump(int skip, int[] increment) { 374 final Philox4x32 copy = new Philox4x32(this); 375 376 // Skip the buffer position forward. 377 // Assumes position is in [0, 4] and skip is less than 4. 378 // Handle rollover but allow position=4 to regenerate buffer on next output call. 379 bufferPosition += skip; 380 if (bufferPosition > PHILOX_BUFFER_SIZE) { 381 bufferPosition -= PHILOX_BUFFER_SIZE; 382 incrementCounter(); 383 } 384 385 // Increment the 128-bit counter. 386 // Addition using unsigned int as longs. 387 // Any overflow bit is carried to the next counter. 388 // Unrolled branchless loop for performance. 389 long r; 390 r = (counter0 & 0xffff_ffffL) + (increment[0] & 0xffff_ffffL); 391 counter0 = (int) r; 392 r = (counter1 & 0xffff_ffffL) + (increment[1] & 0xffff_ffffL) + (r >>> 32); 393 counter1 = (int) r; 394 r = (counter2 & 0xffff_ffffL) + (increment[2] & 0xffff_ffffL) + (r >>> 32); 395 counter2 = (int) r; 396 r = (counter3 & 0xffff_ffffL) + (increment[3] & 0xffff_ffffL) + (r >>> 32); 397 counter3 = (int) r; 398 399 finishJump(); 400 return copy; 401 } 402 403 /** 404 * Finish the jump of this generator. Resets the cached state and regenerates 405 * the output buffer if required. 406 */ 407 private void finishJump() { 408 resetCachedState(); 409 // Regenerate the internal buffer only if the buffer position is 410 // within the output buffer. Otherwise regeneration is delayed until 411 // next output. This allows more efficient consecutive jumping when 412 // the buffer is due to be regenerated. 413 if (bufferPosition < PHILOX_BUFFER_SIZE) { 414 rand10(); 415 } 416 } 417}