001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.rng.core.source32;
019
020import org.apache.commons.rng.ArbitrarilyJumpableUniformRandomProvider;
021import org.apache.commons.rng.JumpableUniformRandomProvider;
022import org.apache.commons.rng.LongJumpableUniformRandomProvider;
023import org.apache.commons.rng.UniformRandomProvider;
024import org.apache.commons.rng.core.util.NumberFactory;
025
026import java.util.Arrays;
027import java.util.stream.Stream;
028
029/**
030 * This class implements the Philox4x32 128-bit counter-based generator with 10 rounds.
031 *
032 * <p>This is a member of the Philox family of generators. Memory footprint is 192 bits
033 * and the period is 2<sup>130</sup>.</p>
034 *
035 * <p>Jumping in the sequence is essentially instantaneous.
036 * This generator provides both subsequences and arbitrary jumps for easy parallelization.
037 *
038 * <p>References:
039 * <ol>
040 * <li>
041 * Salmon, J.K. <i>et al</i> (2011)
042 * <a href="https://dl.acm.org/doi/epdf/10.1145/2063384.2063405">
043 * Parallel Random Numbers: As Easy as 1,2,3</a>.</li>
044 * </ol>
045 *
046 * @since 1.7
047 */
048public final class Philox4x32 extends IntProvider implements LongJumpableUniformRandomProvider,
049        ArbitrarilyJumpableUniformRandomProvider {
050    /** Philox 32-bit mixing constant for counter 0. */
051    private static final int K_PHILOX_10_A = 0x9E3779B9;
052    /** Philox 32-bit mixing constant for counter 1. */
053    private static final int K_PHILOX_10_B = 0xBB67AE85;
054    /** Philox 32-bit constant for key 0. */
055    private static final int K_PHILOX_SA = 0xD2511F53;
056    /** Philox 32-bit constant for key 1. */
057    private static final int K_PHILOX_SB = 0xCD9E8D57;
058    /** Internal buffer size. */
059    private static final int PHILOX_BUFFER_SIZE = 4;
060    /** Number of state variables. */
061    private static final int STATE_SIZE = 7;
062    /** The base-2 logarithm of the period. */
063    private static final int LOG_PERIOD = 130;
064    /** The period of 2^130 as a double. */
065    private static final double PERIOD = 0x1.0p130;
066    /** 2^54. Threshold for a double that cannot have the 2 least
067     * significant bits set when converted to a long. */
068    private static final double TWO_POW_54 = 0x1.0p54;
069
070    /** Counter 0. */
071    private int counter0;
072    /** Counter 1. */
073    private int counter1;
074    /** Counter 2. */
075    private int counter2;
076    /** Counter 3. */
077    private int counter3;
078    /** Output buffer. */
079    private final int[] buffer = new int[PHILOX_BUFFER_SIZE];
080    /** Key low bits. */
081    private int key0;
082    /** Key high bits. */
083    private int key1;
084    /** Output buffer index. When at the end of the buffer the counter is
085     * incremented and the buffer regenerated. */
086    private int bufferPosition;
087
088    /**
089     * Creates a new instance based on an array of int containing, key (first two ints) and
090     * the counter (next 4 ints, low bits = first int). The counter is not scrambled and may
091     * be used to create contiguous blocks with size a multiple of 4 ints. For example,
092     * setting seed[2] = 1 is equivalent to start with seed[2]=0 and calling {@link #next()} 4 times.
093     *
094     * @param seed Array of size 6 defining key0,key1,counter0,counter1,counter2,counter3.
095     *             If the size is smaller, zero values are assumed.
096     */
097    public Philox4x32(int[] seed) {
098        final int[] input = seed.length < 6 ? Arrays.copyOf(seed, 6) : seed;
099        setState(input);
100        bufferPosition = PHILOX_BUFFER_SIZE;
101    }
102
103    /**
104     * Copy constructor.
105     *
106     * @param source Source to copy.
107     */
108    private Philox4x32(Philox4x32 source) {
109        super(source);
110        counter0 = source.counter0;
111        counter1 = source.counter1;
112        counter2 = source.counter2;
113        counter3 = source.counter3;
114        key0 = source.key0;
115        key1 = source.key1;
116        bufferPosition = source.bufferPosition;
117        System.arraycopy(source.buffer, 0, buffer, 0, PHILOX_BUFFER_SIZE);
118    }
119
120    /**
121     * Copies the state from the array into the generator state.
122     *
123     * @param state New state.
124     */
125    private void setState(int[] state) {
126        key0 = state[0];
127        key1 = state[1];
128        counter0 = state[2];
129        counter1 = state[3];
130        counter2 = state[4];
131        counter3 = state[5];
132    }
133
134    /** {@inheritDoc} */
135    @Override
136    protected byte[] getStateInternal() {
137        return composeStateInternal(
138            NumberFactory.makeByteArray(new int[] {
139                key0, key1,
140                counter0, counter1, counter2, counter3,
141                bufferPosition}),
142            super.getStateInternal());
143    }
144
145    /** {@inheritDoc} */
146    @Override
147    protected void setStateInternal(byte[] s) {
148        final byte[][] c = splitStateInternal(s, STATE_SIZE * Integer.BYTES);
149        final int[] state = NumberFactory.makeIntArray(c[0]);
150        setState(state);
151        bufferPosition = state[6];
152        super.setStateInternal(c[1]);
153        // Regenerate the internal buffer
154        rand10();
155    }
156
157    /** {@inheritDoc} */
158    @Override
159    public int next() {
160        final int p = bufferPosition;
161        if (p < PHILOX_BUFFER_SIZE) {
162            bufferPosition = p + 1;
163            return buffer[p];
164        }
165        incrementCounter();
166        rand10();
167        bufferPosition = 1;
168        return buffer[0];
169    }
170
171    /**
172     * Increment the counter by one.
173     */
174    private void incrementCounter() {
175        counter0++;
176        if (counter0 != 0) {
177            return;
178        }
179        counter1++;
180        if (counter1 != 0) {
181            return;
182        }
183        counter2++;
184        if (counter2 != 0) {
185            return;
186        }
187        counter3++;
188    }
189
190    /**
191     * Perform 10 rounds, using counter0, counter1, counter2, counter3 as starting point.
192     * It updates the buffer member variable, but no others.
193     */
194    private void rand10() {
195        buffer[0] = counter0;
196        buffer[1] = counter1;
197        buffer[2] = counter2;
198        buffer[3] = counter3;
199
200        int k0 = key0;
201        int k1 = key1;
202
203        //unrolled loop for performance
204        singleRound(buffer, k0, k1);
205        k0 += K_PHILOX_10_A;
206        k1 += K_PHILOX_10_B;
207        singleRound(buffer, k0, k1);
208        k0 += K_PHILOX_10_A;
209        k1 += K_PHILOX_10_B;
210        singleRound(buffer, k0, k1);
211        k0 += K_PHILOX_10_A;
212        k1 += K_PHILOX_10_B;
213        singleRound(buffer, k0, k1);
214        k0 += K_PHILOX_10_A;
215        k1 += K_PHILOX_10_B;
216        singleRound(buffer, k0, k1);
217        k0 += K_PHILOX_10_A;
218        k1 += K_PHILOX_10_B;
219        singleRound(buffer, k0, k1);
220        k0 += K_PHILOX_10_A;
221        k1 += K_PHILOX_10_B;
222        singleRound(buffer, k0, k1);
223        k0 += K_PHILOX_10_A;
224        k1 += K_PHILOX_10_B;
225        singleRound(buffer, k0, k1);
226        k0 += K_PHILOX_10_A;
227        k1 += K_PHILOX_10_B;
228        singleRound(buffer, k0, k1);
229        k0 += K_PHILOX_10_A;
230        k1 += K_PHILOX_10_B;
231        singleRound(buffer, k0, k1);
232    }
233
234    /**
235     * Performs a single round of philox.
236     *
237     * @param counter Counter, which will be updated after each call.
238     * @param key0 Key low bits.
239     * @param key1 Key high bits.
240     */
241    private static void singleRound(int[] counter, int key0, int key1) {
242        final long product0 = (K_PHILOX_SA & 0xffff_ffffL) * (counter[0] & 0xffff_ffffL);
243        final int hi0 = (int) (product0 >>> 32);
244        final int lo0 = (int) product0;
245        final long product1 = (K_PHILOX_SB & 0xffff_ffffL) * (counter[2] & 0xffff_ffffL);
246        final int hi1 = (int) (product1 >>> 32);
247        final int lo1 = (int) product1;
248
249        counter[0] = hi1 ^ counter[1] ^ key0;
250        counter[1] = lo1;
251        counter[2] = hi0 ^ counter[3] ^ key1;
252        counter[3] = lo0;
253    }
254
255    /**
256     * {@inheritDoc}
257     *
258     * <p>The jump size is the equivalent of 2<sup>66</sup>
259     * calls to {@link UniformRandomProvider#nextInt() nextInt()}. It can provide
260     * up to 2<sup>64</sup> non-overlapping subsequences.</p>
261     */
262    @Override
263    public UniformRandomProvider jump() {
264        final Philox4x32 copy = new Philox4x32(this);
265        if (++counter2 == 0) {
266            counter3++;
267        }
268        finishJump();
269        return copy;
270    }
271
272    /**
273     * {@inheritDoc}
274     *
275     * <p>The jump size is the equivalent of 2<sup>98</sup> calls to
276     * {@link UniformRandomProvider#nextLong() nextLong()}. It can provide up to
277     * 2<sup>32</sup> non-overlapping subsequences of length 2<sup>98</sup>; each
278     * subsequence can provide up to 2<sup>32</sup> non-overlapping subsequences of
279     * length 2<sup>66</sup> using the {@link #jump()} method.</p>
280     */
281    @Override
282    public JumpableUniformRandomProvider longJump() {
283        final Philox4x32 copy = new Philox4x32(this);
284        counter3++;
285        finishJump();
286        return copy;
287    }
288
289    /** {@inheritDoc} */
290    @Override
291    public ArbitrarilyJumpableUniformRandomProvider jump(double distance) {
292        IntJumpDistances.validateJump(distance, PERIOD);
293        // Decompose into an increment for the buffer position and counter
294        final int skip = getBufferPositionIncrement(distance);
295        final int[] increment = getCounterIncrement(distance);
296        return copyAndJump(skip, increment);
297    }
298
299    /** {@inheritDoc} */
300    @Override
301    public ArbitrarilyJumpableUniformRandomProvider jumpPowerOfTwo(int logDistance) {
302        IntJumpDistances.validateJumpPowerOfTwo(logDistance, LOG_PERIOD);
303        // For simplicity this re-uses code to increment the buffer position and counter
304        // when only one or the other is required for a power of 2.
305        // In practice the jump should be much larger than 1 and the necessary regeneration
306        // of the buffer is the most time consuming step.
307        int skip = 0;
308        final int[] increment = new int[PHILOX_BUFFER_SIZE];
309        if (logDistance >= 0) {
310            if (logDistance <= 1) {
311                // The first 2 powers update the buffer position.
312                skip = 1 << logDistance;
313            } else {
314                // Remaining powers update the 128-bit counter
315                final int n = logDistance - 2;
316                // Create the increment.
317                // Start at n / 32 with a 1-bit shifted n % 32
318                increment[n >> 5] = 1 << (n & 0x1f);
319            }
320        }
321        return copyAndJump(skip, increment);
322    }
323
324    /** {@inheritDoc} */
325    @Override
326    public Stream<ArbitrarilyJumpableUniformRandomProvider> jumps(double distance) {
327        IntJumpDistances.validateJump(distance, PERIOD);
328        // Decompose into an increment for the buffer position and counter
329        final int skip = getBufferPositionIncrement(distance);
330        final int[] increment = getCounterIncrement(distance);
331        return Stream.generate(() -> copyAndJump(skip, increment)).sequential();
332    }
333
334    /**
335     * Gets the buffer position increment from the jump distance.
336     *
337     * @param distance Jump distance.
338     * @return the buffer position increment
339     */
340    private static int getBufferPositionIncrement(double distance) {
341        return distance < TWO_POW_54 ?
342            // 2 least significant digits from the integer representation
343            (int)((long) distance) & 0x3 :
344            0;
345    }
346
347    /**
348     * Gets the counter increment from the jump distance.
349     *
350     * @param distance Jump distance.
351     * @return the counter increment
352     */
353    private static int[] getCounterIncrement(double distance) {
354        final int[] increment = new int[PHILOX_BUFFER_SIZE];
355        // The counter is incremented if the distance is above the buffer size
356        // (increment = distance / 4).
357        if (distance >= PHILOX_BUFFER_SIZE) {
358            IntJumpDistances.writeUnsignedInteger(distance * 0.25, increment);
359        }
360        return increment;
361    }
362
363    /**
364     * Copy the generator and advance the internal state. The copy is returned.
365     *
366     * <p>This method: (1) assumes that the arguments have been validated;
367     * and (2) regenerates the output buffer if required.
368     *
369     * @param skip Amount to skip the buffer position in [0, 3].
370     * @param increment Unsigned 128-bit increment, least significant bits first.
371     * @return the copy
372     */
373    private ArbitrarilyJumpableUniformRandomProvider copyAndJump(int skip, int[] increment) {
374        final Philox4x32 copy = new Philox4x32(this);
375
376        // Skip the buffer position forward.
377        // Assumes position is in [0, 4] and skip is less than 4.
378        // Handle rollover but allow position=4 to regenerate buffer on next output call.
379        bufferPosition += skip;
380        if (bufferPosition > PHILOX_BUFFER_SIZE) {
381            bufferPosition -= PHILOX_BUFFER_SIZE;
382            incrementCounter();
383        }
384
385        // Increment the 128-bit counter.
386        // Addition using unsigned int as longs.
387        // Any overflow bit is carried to the next counter.
388        // Unrolled branchless loop for performance.
389        long r;
390        r = (counter0 & 0xffff_ffffL) + (increment[0] & 0xffff_ffffL);
391        counter0 = (int) r;
392        r = (counter1 & 0xffff_ffffL) + (increment[1] & 0xffff_ffffL) + (r >>> 32);
393        counter1 = (int) r;
394        r = (counter2 & 0xffff_ffffL) + (increment[2] & 0xffff_ffffL) + (r >>> 32);
395        counter2 = (int) r;
396        r = (counter3 & 0xffff_ffffL) + (increment[3] & 0xffff_ffffL) + (r >>> 32);
397        counter3 = (int) r;
398
399        finishJump();
400        return copy;
401    }
402
403    /**
404     * Finish the jump of this generator. Resets the cached state and regenerates
405     * the output buffer if required.
406     */
407    private void finishJump() {
408        resetCachedState();
409        // Regenerate the internal buffer only if the buffer position is
410        // within the output buffer. Otherwise regeneration is delayed until
411        // next output. This allows more efficient consecutive jumping when
412        // the buffer is due to be regenerated.
413        if (bufferPosition < PHILOX_BUFFER_SIZE) {
414            rand10();
415        }
416    }
417}