001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.rng.core.source64; 019 020import java.util.Arrays; 021import java.util.stream.Stream; 022import org.apache.commons.rng.ArbitrarilyJumpableUniformRandomProvider; 023import org.apache.commons.rng.JumpableUniformRandomProvider; 024import org.apache.commons.rng.LongJumpableUniformRandomProvider; 025import org.apache.commons.rng.UniformRandomProvider; 026import org.apache.commons.rng.core.util.NumberFactory; 027 028/** 029 * This class implements the Philox4x64 256-bit counter-based generator with 10 rounds. 030 * 031 * <p>This is a member of the Philox family of generators. Memory footprint is 384 bits 032 * and the period is 2<sup>258</sup>.</p> 033 * 034 * <p>Jumping in the sequence is essentially instantaneous. 035 * This generator provides both subsequences and arbitrary jumps for easy parallelization. 036 * 037 * <p>References: 038 * <ol> 039 * <li> 040 * Salmon, J.K. <i>et al</i> (2011) 041 * <a href="https://dl.acm.org/doi/epdf/10.1145/2063384.2063405"> 042 * Parallel Random Numbers: As Easy as 1,2,3</a>.</li> 043 * </ol> 044 * 045 * @since 1.7 046 */ 047public final class Philox4x64 extends LongProvider implements LongJumpableUniformRandomProvider, 048 ArbitrarilyJumpableUniformRandomProvider { 049 /** Philox 64-bit mixing constant for counter 0. */ 050 private static final long PHILOX_M0 = 0xD2E7470EE14C6C93L; 051 /** Philox 64-bit mixing constant for counter 1. */ 052 private static final long PHILOX_M1 = 0xCA5A826395121157L; 053 /** Philox 64-bit constant for key 0. */ 054 private static final long PHILOX_W0 = 0x9E3779B97F4A7C15L; 055 /** Philox 64-bit constant for key 1. */ 056 private static final long PHILOX_W1 = 0xBB67AE8584CAA73BL; 057 /** Internal buffer size. */ 058 private static final int PHILOX_BUFFER_SIZE = 4; 059 /** Number of state variables. */ 060 private static final int STATE_SIZE = 7; 061 /** The base-2 logarithm of the period. */ 062 private static final int LOG_PERIOD = 258; 063 /** The period of 2^258 as a double. */ 064 private static final double PERIOD = 0x1.0p258; 065 /** 2^54. Threshold for a double that cannot have the 2 least 066 * significant bits set when converted to a long. */ 067 private static final double TWO_POW_54 = 0x1.0p54; 068 069 /** Counter 0. */ 070 private long counter0; 071 /** Counter 1. */ 072 private long counter1; 073 /** Counter 2. */ 074 private long counter2; 075 /** Counter 3. */ 076 private long counter3; 077 /** Output buffer. */ 078 private final long[] buffer = new long[PHILOX_BUFFER_SIZE]; 079 /** Key low bits. */ 080 private long key0; 081 /** Key high bits. */ 082 private long key1; 083 /** Output buffer index. When at the end of the buffer the counter is 084 * incremented and the buffer regenerated. */ 085 private int bufferPosition; 086 087 /** 088 * Creates a new instance given 6 long numbers containing, key (first two longs) and 089 * the counter (next 4 longs, low bits = first long). The counter is not scrambled and may 090 * be used to create contiguous blocks with size a multiple of 4 longs. For example, 091 * setting seed[2] = 1 is equivalent to start with seed[2]=0 and calling {@link #next()} 4 times. 092 * 093 * @param seed Array of size 6 defining key0,key1,counter0,counter1,counter2,counter3. 094 * If the size is smaller, zero values are assumed. 095 */ 096 public Philox4x64(long[] seed) { 097 final long[] input = seed.length < 6 ? Arrays.copyOf(seed, 6) : seed; 098 setState(input); 099 bufferPosition = PHILOX_BUFFER_SIZE; 100 } 101 102 /** 103 * Copy constructor. 104 * 105 * @param source Source to copy. 106 */ 107 private Philox4x64(Philox4x64 source) { 108 super(source); 109 counter0 = source.counter0; 110 counter1 = source.counter1; 111 counter2 = source.counter2; 112 counter3 = source.counter3; 113 key0 = source.key0; 114 key1 = source.key1; 115 bufferPosition = source.bufferPosition; 116 System.arraycopy(source.buffer, 0, buffer, 0, PHILOX_BUFFER_SIZE); 117 } 118 119 /** 120 * Copies the state from the array into the generator state. 121 * 122 * @param state New state. 123 */ 124 private void setState(long[] state) { 125 key0 = state[0]; 126 key1 = state[1]; 127 counter0 = state[2]; 128 counter1 = state[3]; 129 counter2 = state[4]; 130 counter3 = state[5]; 131 } 132 133 /** {@inheritDoc} */ 134 @Override 135 protected byte[] getStateInternal() { 136 return composeStateInternal( 137 NumberFactory.makeByteArray(new long[] { 138 key0, key1, 139 counter0, counter1, counter2, counter3, 140 bufferPosition}), 141 super.getStateInternal()); 142 } 143 144 /** {@inheritDoc} */ 145 @Override 146 protected void setStateInternal(byte[] s) { 147 final byte[][] c = splitStateInternal(s, STATE_SIZE * Long.BYTES); 148 final long[] state = NumberFactory.makeLongArray(c[0]); 149 setState(state); 150 bufferPosition = (int) state[6]; 151 super.setStateInternal(c[1]); 152 // Regenerate the internal buffer 153 rand10(); 154 } 155 156 /** {@inheritDoc} */ 157 @Override 158 public long next() { 159 final int p = bufferPosition; 160 if (bufferPosition < PHILOX_BUFFER_SIZE) { 161 bufferPosition = p + 1; 162 return buffer[p]; 163 } 164 incrementCounter(); 165 rand10(); 166 bufferPosition = 1; 167 return buffer[0]; 168 } 169 170 /** 171 * Increment the counter by one. 172 */ 173 private void incrementCounter() { 174 counter0++; 175 if (counter0 != 0) { 176 return; 177 } 178 counter1++; 179 if (counter1 != 0) { 180 return; 181 } 182 counter2++; 183 if (counter2 != 0) { 184 return; 185 } 186 counter3++; 187 } 188 189 /** 190 * Perform 10 rounds, using counter0, counter1, counter2, counter3 as starting point. 191 * It updates the buffer member variable, but no others. 192 */ 193 private void rand10() { 194 buffer[0] = counter0; 195 buffer[1] = counter1; 196 buffer[2] = counter2; 197 buffer[3] = counter3; 198 199 long k0 = key0; 200 long k1 = key1; 201 202 //unrolled loop for performance 203 singleRound(buffer, k0, k1); 204 k0 += PHILOX_W0; 205 k1 += PHILOX_W1; 206 singleRound(buffer, k0, k1); 207 k0 += PHILOX_W0; 208 k1 += PHILOX_W1; 209 singleRound(buffer, k0, k1); 210 k0 += PHILOX_W0; 211 k1 += PHILOX_W1; 212 singleRound(buffer, k0, k1); 213 k0 += PHILOX_W0; 214 k1 += PHILOX_W1; 215 singleRound(buffer, k0, k1); 216 k0 += PHILOX_W0; 217 k1 += PHILOX_W1; 218 singleRound(buffer, k0, k1); 219 k0 += PHILOX_W0; 220 k1 += PHILOX_W1; 221 singleRound(buffer, k0, k1); 222 k0 += PHILOX_W0; 223 k1 += PHILOX_W1; 224 singleRound(buffer, k0, k1); 225 k0 += PHILOX_W0; 226 k1 += PHILOX_W1; 227 singleRound(buffer, k0, k1); 228 k0 += PHILOX_W0; 229 k1 += PHILOX_W1; 230 singleRound(buffer, k0, k1); 231 } 232 233 /** 234 * Performs a single round of philox. 235 * 236 * @param counter Counter, which will be updated after each call. 237 * @param key0 Key low bits. 238 * @param key1 Key high bits. 239 */ 240 private static void singleRound(long[] counter, long key0, long key1) { 241 final long lo0 = PHILOX_M0 * counter[0]; 242 final long hi0 = PhiloxSupport.unsignedMultiplyHigh(PHILOX_M0, counter[0]); 243 final long lo1 = PHILOX_M1 * counter[2]; 244 final long hi1 = PhiloxSupport.unsignedMultiplyHigh(PHILOX_M1, counter[2]); 245 246 counter[0] = hi1 ^ counter[1] ^ key0; 247 counter[1] = lo1; 248 counter[2] = hi0 ^ counter[3] ^ key1; 249 counter[3] = lo0; 250 } 251 252 /** 253 * {@inheritDoc} 254 * 255 * <p>The jump size is the equivalent of 2<sup>130</sup> 256 * calls to {@link UniformRandomProvider#nextLong() nextLong()}. It can provide 257 * up to 2<sup>128</sup> non-overlapping subsequences.</p> 258 */ 259 @Override 260 public UniformRandomProvider jump() { 261 final Philox4x64 copy = new Philox4x64(this); 262 if (++counter2 == 0) { 263 counter3++; 264 } 265 finishJump(); 266 return copy; 267 } 268 269 /** 270 * {@inheritDoc} 271 * 272 * <p>The jump size is the equivalent of 2<sup>194</sup> calls to 273 * {@link UniformRandomProvider#nextLong() nextLong()}. It can provide up to 274 * 2<sup>64</sup> non-overlapping subsequences of length 2<sup>194</sup>; each 275 * subsequence can provide up to 2<sup>64</sup> non-overlapping subsequences of 276 * length 2<sup>130</sup> using the {@link #jump()} method.</p> 277 */ 278 @Override 279 public JumpableUniformRandomProvider longJump() { 280 final Philox4x64 copy = new Philox4x64(this); 281 counter3++; 282 finishJump(); 283 return copy; 284 } 285 286 @Override 287 public ArbitrarilyJumpableUniformRandomProvider jump(double distance) { 288 LongJumpDistances.validateJump(distance, PERIOD); 289 // Decompose into an increment for the buffer position and counter 290 final int skip = getBufferPositionIncrement(distance); 291 final long[] increment = getCounterIncrement(distance); 292 return copyAndJump(skip, increment); 293 } 294 295 @Override 296 public ArbitrarilyJumpableUniformRandomProvider jumpPowerOfTwo(int logDistance) { 297 LongJumpDistances.validateJumpPowerOfTwo(logDistance, LOG_PERIOD); 298 // For simplicity this re-uses code to increment the buffer position and counter 299 // when only one or the other is required for a power of 2. 300 // In practice the jump should be much larger than 1 and the necessary regeneration 301 // of the buffer is the most time consuming step. 302 int skip = 0; 303 final long[] increment = new long[PHILOX_BUFFER_SIZE]; 304 if (logDistance >= 0) { 305 if (logDistance <= 1) { 306 // The first 2 powers update the buffer position. 307 skip = 1 << logDistance; 308 } else { 309 // Remaining powers update the 256-bit counter 310 final int n = logDistance - 2; 311 // Create the increment. 312 // Start at n / 64 with a 1-bit shifted n % 64 313 increment[n >> 6] = 1L << (n & 0x3f); 314 } 315 } 316 return copyAndJump(skip, increment); 317 } 318 319 /** {@inheritDoc} */ 320 @Override 321 public Stream<ArbitrarilyJumpableUniformRandomProvider> jumps(double distance) { 322 LongJumpDistances.validateJump(distance, PERIOD); 323 // Decompose into an increment for the buffer position and counter 324 final int skip = getBufferPositionIncrement(distance); 325 final long[] increment = getCounterIncrement(distance); 326 return Stream.generate(() -> copyAndJump(skip, increment)).sequential(); 327 } 328 329 /** 330 * Gets the buffer position increment from the jump distance. 331 * 332 * @param distance Jump distance. 333 * @return the buffer position increment 334 */ 335 private static int getBufferPositionIncrement(double distance) { 336 return distance < TWO_POW_54 ? 337 // 2 least significant digits from the integer representation 338 (int)((long) distance) & 0x3 : 339 0; 340 } 341 342 /** 343 * Gets the counter increment from the jump distance. 344 * 345 * @param distance Jump distance. 346 * @return the counter increment 347 */ 348 private static long[] getCounterIncrement(double distance) { 349 final long[] increment = new long[PHILOX_BUFFER_SIZE]; 350 // The counter is incremented if the distance is above the buffer size 351 // (increment = distance / 4). 352 if (distance >= PHILOX_BUFFER_SIZE) { 353 LongJumpDistances.writeUnsignedInteger(distance * 0.25, increment); 354 } 355 return increment; 356 } 357 358 /** 359 * Copy the generator and advance the internal state. The copy is returned. 360 * 361 * <p>This method: (1) assumes that the arguments have been validated; 362 * and (2) regenerates the output buffer if required. 363 * 364 * @param skip Amount to skip the buffer position in [0, 3]. 365 * @param increment Unsigned 256-bit increment, least significant bits first. 366 * @return the copy 367 */ 368 private ArbitrarilyJumpableUniformRandomProvider copyAndJump(int skip, long[] increment) { 369 final Philox4x64 copy = new Philox4x64(this); 370 371 // Skip the buffer position forward. 372 // Assumes position is in [0, 4] and skip is less than 4. 373 // Handle rollover but allow position=4 to regenerate buffer on next output call. 374 bufferPosition += skip; 375 if (bufferPosition > PHILOX_BUFFER_SIZE) { 376 bufferPosition -= PHILOX_BUFFER_SIZE; 377 incrementCounter(); 378 } 379 380 // Increment the 256-bit counter. 381 // Addition using unsigned int as longs. 382 // Any overflow bit is carried to the next counter. 383 // Unrolled branchless loop for performance. 384 long r; 385 long s; 386 r = (counter0 & 0xffff_ffffL) + (increment[0] & 0xffff_ffffL); 387 s = (counter0 >>> 32) + (increment[0] >>> 32) + (r >>> 32); 388 counter0 = (r & 0xffff_ffffL) | (s << 32); 389 390 r = (counter1 & 0xffff_ffffL) + (increment[1] & 0xffff_ffffL) + (s >>> 32); 391 s = (counter1 >>> 32) + (increment[1] >>> 32) + (r >>> 32); 392 counter1 = (r & 0xffff_ffffL) | (s << 32); 393 394 r = (counter2 & 0xffff_ffffL) + (increment[2] & 0xffff_ffffL) + (s >>> 32); 395 s = (counter2 >>> 32) + (increment[2] >>> 32) + (r >>> 32); 396 counter2 = (r & 0xffff_ffffL) | (s << 32); 397 398 r = (counter3 & 0xffff_ffffL) + (increment[3] & 0xffff_ffffL) + (s >>> 32); 399 s = (counter3 >>> 32) + (increment[3] >>> 32) + (r >>> 32); 400 counter3 = (r & 0xffff_ffffL) | (s << 32); 401 402 finishJump(); 403 return copy; 404 } 405 406 /** 407 * Finish the jump of this generator. Resets the cached state and regenerates 408 * the output buffer if required. 409 */ 410 private void finishJump() { 411 resetCachedState(); 412 // Regenerate the internal buffer only if the buffer position is 413 // within the output buffer. Otherwise regeneration is delayed until 414 // next output. This allows more efficient consecutive jumping when 415 // the buffer is due to be regenerated. 416 if (bufferPosition < PHILOX_BUFFER_SIZE) { 417 rand10(); 418 } 419 } 420}