001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.rng.core.source64;
019
020import java.util.Arrays;
021import java.util.stream.Stream;
022import org.apache.commons.rng.ArbitrarilyJumpableUniformRandomProvider;
023import org.apache.commons.rng.JumpableUniformRandomProvider;
024import org.apache.commons.rng.LongJumpableUniformRandomProvider;
025import org.apache.commons.rng.UniformRandomProvider;
026import org.apache.commons.rng.core.util.NumberFactory;
027
028/**
029 * This class implements the Philox4x64 256-bit counter-based generator with 10 rounds.
030 *
031 * <p>This is a member of the Philox family of generators. Memory footprint is 384 bits
032 * and the period is 2<sup>258</sup>.</p>
033 *
034 * <p>Jumping in the sequence is essentially instantaneous.
035 * This generator provides both subsequences and arbitrary jumps for easy parallelization.
036 *
037 * <p>References:
038 * <ol>
039 * <li>
040 * Salmon, J.K. <i>et al</i> (2011)
041 * <a href="https://dl.acm.org/doi/epdf/10.1145/2063384.2063405">
042 * Parallel Random Numbers: As Easy as 1,2,3</a>.</li>
043 * </ol>
044 *
045 * @since 1.7
046 */
047public final class Philox4x64 extends LongProvider implements LongJumpableUniformRandomProvider,
048        ArbitrarilyJumpableUniformRandomProvider {
049    /** Philox 64-bit mixing constant for counter 0. */
050    private static final long PHILOX_M0 = 0xD2E7470EE14C6C93L;
051    /** Philox 64-bit mixing constant for counter 1. */
052    private static final long PHILOX_M1 = 0xCA5A826395121157L;
053    /** Philox 64-bit constant for key 0. */
054    private static final long PHILOX_W0 = 0x9E3779B97F4A7C15L;
055    /** Philox 64-bit constant for key 1. */
056    private static final long PHILOX_W1 = 0xBB67AE8584CAA73BL;
057    /** Internal buffer size. */
058    private static final int PHILOX_BUFFER_SIZE = 4;
059    /** Number of state variables. */
060    private static final int STATE_SIZE = 7;
061    /** The base-2 logarithm of the period. */
062    private static final int LOG_PERIOD = 258;
063    /** The period of 2^258 as a double. */
064    private static final double PERIOD = 0x1.0p258;
065    /** 2^54. Threshold for a double that cannot have the 2 least
066     * significant bits set when converted to a long. */
067    private static final double TWO_POW_54 = 0x1.0p54;
068
069    /** Counter 0. */
070    private long counter0;
071    /** Counter 1. */
072    private long counter1;
073    /** Counter 2. */
074    private long counter2;
075    /** Counter 3. */
076    private long counter3;
077    /** Output buffer. */
078    private final long[] buffer = new long[PHILOX_BUFFER_SIZE];
079    /** Key low bits. */
080    private long key0;
081    /** Key high bits. */
082    private long key1;
083    /** Output buffer index. When at the end of the buffer the counter is
084     * incremented and the buffer regenerated. */
085    private int bufferPosition;
086
087    /**
088     * Creates a new instance given 6 long numbers containing, key (first two longs) and
089     * the counter (next 4 longs, low bits = first long). The counter is not scrambled and may
090     * be used to create contiguous blocks with size a multiple of 4 longs. For example,
091     * setting seed[2] = 1 is equivalent to start with seed[2]=0 and calling {@link #next()} 4 times.
092     *
093     * @param seed Array of size 6 defining key0,key1,counter0,counter1,counter2,counter3.
094     *             If the size is smaller, zero values are assumed.
095     */
096    public Philox4x64(long[] seed) {
097        final long[] input = seed.length < 6 ? Arrays.copyOf(seed, 6) : seed;
098        setState(input);
099        bufferPosition = PHILOX_BUFFER_SIZE;
100    }
101
102    /**
103     * Copy constructor.
104     *
105     * @param source Source to copy.
106     */
107    private Philox4x64(Philox4x64 source) {
108        super(source);
109        counter0 = source.counter0;
110        counter1 = source.counter1;
111        counter2 = source.counter2;
112        counter3 = source.counter3;
113        key0 = source.key0;
114        key1 = source.key1;
115        bufferPosition = source.bufferPosition;
116        System.arraycopy(source.buffer, 0, buffer, 0, PHILOX_BUFFER_SIZE);
117    }
118
119    /**
120     * Copies the state from the array into the generator state.
121     *
122     * @param state New state.
123     */
124    private void setState(long[] state) {
125        key0 = state[0];
126        key1 = state[1];
127        counter0 = state[2];
128        counter1 = state[3];
129        counter2 = state[4];
130        counter3 = state[5];
131    }
132
133    /** {@inheritDoc} */
134    @Override
135    protected byte[] getStateInternal() {
136        return composeStateInternal(
137            NumberFactory.makeByteArray(new long[] {
138                key0, key1,
139                counter0, counter1, counter2, counter3,
140                bufferPosition}),
141            super.getStateInternal());
142    }
143
144    /** {@inheritDoc} */
145    @Override
146    protected void setStateInternal(byte[] s) {
147        final byte[][] c = splitStateInternal(s, STATE_SIZE * Long.BYTES);
148        final long[] state = NumberFactory.makeLongArray(c[0]);
149        setState(state);
150        bufferPosition = (int) state[6];
151        super.setStateInternal(c[1]);
152        // Regenerate the internal buffer
153        rand10();
154    }
155
156    /** {@inheritDoc} */
157    @Override
158    public long next() {
159        final int p = bufferPosition;
160        if (bufferPosition < PHILOX_BUFFER_SIZE) {
161            bufferPosition = p + 1;
162            return buffer[p];
163        }
164        incrementCounter();
165        rand10();
166        bufferPosition = 1;
167        return buffer[0];
168    }
169
170    /**
171     * Increment the counter by one.
172     */
173    private void incrementCounter() {
174        counter0++;
175        if (counter0 != 0) {
176            return;
177        }
178        counter1++;
179        if (counter1 != 0) {
180            return;
181        }
182        counter2++;
183        if (counter2 != 0) {
184            return;
185        }
186        counter3++;
187    }
188
189    /**
190     * Perform 10 rounds, using counter0, counter1, counter2, counter3 as starting point.
191     * It updates the buffer member variable, but no others.
192     */
193    private void rand10() {
194        buffer[0] = counter0;
195        buffer[1] = counter1;
196        buffer[2] = counter2;
197        buffer[3] = counter3;
198
199        long k0 = key0;
200        long k1 = key1;
201
202        //unrolled loop for performance
203        singleRound(buffer, k0, k1);
204        k0 += PHILOX_W0;
205        k1 += PHILOX_W1;
206        singleRound(buffer, k0, k1);
207        k0 += PHILOX_W0;
208        k1 += PHILOX_W1;
209        singleRound(buffer, k0, k1);
210        k0 += PHILOX_W0;
211        k1 += PHILOX_W1;
212        singleRound(buffer, k0, k1);
213        k0 += PHILOX_W0;
214        k1 += PHILOX_W1;
215        singleRound(buffer, k0, k1);
216        k0 += PHILOX_W0;
217        k1 += PHILOX_W1;
218        singleRound(buffer, k0, k1);
219        k0 += PHILOX_W0;
220        k1 += PHILOX_W1;
221        singleRound(buffer, k0, k1);
222        k0 += PHILOX_W0;
223        k1 += PHILOX_W1;
224        singleRound(buffer, k0, k1);
225        k0 += PHILOX_W0;
226        k1 += PHILOX_W1;
227        singleRound(buffer, k0, k1);
228        k0 += PHILOX_W0;
229        k1 += PHILOX_W1;
230        singleRound(buffer, k0, k1);
231    }
232
233    /**
234     * Performs a single round of philox.
235     *
236     * @param counter Counter, which will be updated after each call.
237     * @param key0 Key low bits.
238     * @param key1 Key high bits.
239     */
240    private static void singleRound(long[] counter, long key0, long key1) {
241        final long lo0 = PHILOX_M0 * counter[0];
242        final long hi0 = PhiloxSupport.unsignedMultiplyHigh(PHILOX_M0, counter[0]);
243        final long lo1 = PHILOX_M1 * counter[2];
244        final long hi1 = PhiloxSupport.unsignedMultiplyHigh(PHILOX_M1, counter[2]);
245
246        counter[0] = hi1 ^ counter[1] ^ key0;
247        counter[1] = lo1;
248        counter[2] = hi0 ^ counter[3] ^ key1;
249        counter[3] = lo0;
250    }
251
252    /**
253     * {@inheritDoc}
254     *
255     * <p>The jump size is the equivalent of 2<sup>130</sup>
256     * calls to {@link UniformRandomProvider#nextLong() nextLong()}. It can provide
257     * up to 2<sup>128</sup> non-overlapping subsequences.</p>
258     */
259    @Override
260    public UniformRandomProvider jump() {
261        final Philox4x64 copy = new Philox4x64(this);
262        if (++counter2 == 0) {
263            counter3++;
264        }
265        finishJump();
266        return copy;
267    }
268
269    /**
270     * {@inheritDoc}
271     *
272     * <p>The jump size is the equivalent of 2<sup>194</sup> calls to
273     * {@link UniformRandomProvider#nextLong() nextLong()}. It can provide up to
274     * 2<sup>64</sup> non-overlapping subsequences of length 2<sup>194</sup>; each
275     * subsequence can provide up to 2<sup>64</sup> non-overlapping subsequences of
276     * length 2<sup>130</sup> using the {@link #jump()} method.</p>
277     */
278    @Override
279    public JumpableUniformRandomProvider longJump() {
280        final Philox4x64 copy = new Philox4x64(this);
281        counter3++;
282        finishJump();
283        return copy;
284    }
285
286    @Override
287    public ArbitrarilyJumpableUniformRandomProvider jump(double distance) {
288        LongJumpDistances.validateJump(distance, PERIOD);
289        // Decompose into an increment for the buffer position and counter
290        final int skip = getBufferPositionIncrement(distance);
291        final long[] increment = getCounterIncrement(distance);
292        return copyAndJump(skip, increment);
293    }
294
295    @Override
296    public ArbitrarilyJumpableUniformRandomProvider jumpPowerOfTwo(int logDistance) {
297        LongJumpDistances.validateJumpPowerOfTwo(logDistance, LOG_PERIOD);
298        // For simplicity this re-uses code to increment the buffer position and counter
299        // when only one or the other is required for a power of 2.
300        // In practice the jump should be much larger than 1 and the necessary regeneration
301        // of the buffer is the most time consuming step.
302        int skip = 0;
303        final long[] increment = new long[PHILOX_BUFFER_SIZE];
304        if (logDistance >= 0) {
305            if (logDistance <= 1) {
306                // The first 2 powers update the buffer position.
307                skip = 1 << logDistance;
308            } else {
309                // Remaining powers update the 256-bit counter
310                final int n = logDistance - 2;
311                // Create the increment.
312                // Start at n / 64 with a 1-bit shifted n % 64
313                increment[n >> 6] = 1L << (n & 0x3f);
314            }
315        }
316        return copyAndJump(skip, increment);
317    }
318
319    /** {@inheritDoc} */
320    @Override
321    public Stream<ArbitrarilyJumpableUniformRandomProvider> jumps(double distance) {
322        LongJumpDistances.validateJump(distance, PERIOD);
323        // Decompose into an increment for the buffer position and counter
324        final int skip = getBufferPositionIncrement(distance);
325        final long[] increment = getCounterIncrement(distance);
326        return Stream.generate(() -> copyAndJump(skip, increment)).sequential();
327    }
328
329    /**
330     * Gets the buffer position increment from the jump distance.
331     *
332     * @param distance Jump distance.
333     * @return the buffer position increment
334     */
335    private static int getBufferPositionIncrement(double distance) {
336        return distance < TWO_POW_54 ?
337            // 2 least significant digits from the integer representation
338            (int)((long) distance) & 0x3 :
339            0;
340    }
341
342    /**
343     * Gets the counter increment from the jump distance.
344     *
345     * @param distance Jump distance.
346     * @return the counter increment
347     */
348    private static long[] getCounterIncrement(double distance) {
349        final long[] increment = new long[PHILOX_BUFFER_SIZE];
350        // The counter is incremented if the distance is above the buffer size
351        // (increment = distance / 4).
352        if (distance >= PHILOX_BUFFER_SIZE) {
353            LongJumpDistances.writeUnsignedInteger(distance * 0.25, increment);
354        }
355        return increment;
356    }
357
358    /**
359     * Copy the generator and advance the internal state. The copy is returned.
360     *
361     * <p>This method: (1) assumes that the arguments have been validated;
362     * and (2) regenerates the output buffer if required.
363     *
364     * @param skip Amount to skip the buffer position in [0, 3].
365     * @param increment Unsigned 256-bit increment, least significant bits first.
366     * @return the copy
367     */
368    private ArbitrarilyJumpableUniformRandomProvider copyAndJump(int skip, long[] increment) {
369        final Philox4x64 copy = new Philox4x64(this);
370
371        // Skip the buffer position forward.
372        // Assumes position is in [0, 4] and skip is less than 4.
373        // Handle rollover but allow position=4 to regenerate buffer on next output call.
374        bufferPosition += skip;
375        if (bufferPosition > PHILOX_BUFFER_SIZE) {
376            bufferPosition -= PHILOX_BUFFER_SIZE;
377            incrementCounter();
378        }
379
380        // Increment the 256-bit counter.
381        // Addition using unsigned int as longs.
382        // Any overflow bit is carried to the next counter.
383        // Unrolled branchless loop for performance.
384        long r;
385        long s;
386        r = (counter0 & 0xffff_ffffL) + (increment[0] & 0xffff_ffffL);
387        s = (counter0 >>> 32) + (increment[0] >>> 32) + (r >>> 32);
388        counter0 = (r & 0xffff_ffffL) | (s << 32);
389
390        r = (counter1 & 0xffff_ffffL) + (increment[1] & 0xffff_ffffL) + (s >>> 32);
391        s = (counter1 >>> 32) + (increment[1] >>> 32) + (r >>> 32);
392        counter1 = (r & 0xffff_ffffL) | (s << 32);
393
394        r = (counter2 & 0xffff_ffffL) + (increment[2] & 0xffff_ffffL) + (s >>> 32);
395        s = (counter2 >>> 32) + (increment[2] >>> 32) + (r >>> 32);
396        counter2 = (r & 0xffff_ffffL) | (s << 32);
397
398        r = (counter3 & 0xffff_ffffL) + (increment[3] & 0xffff_ffffL) + (s >>> 32);
399        s = (counter3 >>> 32) + (increment[3] >>> 32) + (r >>> 32);
400        counter3 = (r & 0xffff_ffffL) | (s << 32);
401
402        finishJump();
403        return copy;
404    }
405
406    /**
407     * Finish the jump of this generator. Resets the cached state and regenerates
408     * the output buffer if required.
409     */
410    private void finishJump() {
411        resetCachedState();
412        // Regenerate the internal buffer only if the buffer position is
413        // within the output buffer. Otherwise regeneration is delayed until
414        // next output. This allows more efficient consecutive jumping when
415        // the buffer is due to be regenerated.
416        if (bufferPosition < PHILOX_BUFFER_SIZE) {
417            rand10();
418        }
419    }
420}