Sunday, March 01, 2015

Random number generator PCG32 translated to Python.

There's a new family of pseudo-random number generators called PCG going around with a great Video introducing the problem that it solves:


The site has a pcg32 baby implementation in C that I chose to convert to plain python to learn more about the algorithm.

The pcg32 C code is written to be quite lean-n-mean but Python doesn't do fast integer bit operations - its a higher level language, integers automatically extend to however bits are necessary for computing a result without over/underflow. I therefore needed to use 32 and 64 bit bit-masks to constrain results to the uint32 and uint64 types of the C code.

The C struct pcg_state_setseq_64 is mutable and passed between functions. I chose to model this with a two element Python list - the closest I could think of to a lightweight mutable namedtuple (as there is no such thing in the standard library).

The code is commented with the C-code it is derived from:

# -*- coding: utf-8 -*-
"""
Created on Thu Feb 26 22:58:33 2015
 
@author: Paddy3118
 
 
PCG32 translated from the C code:
  http://www.pcg-random.org/download.html#minimal-c-implementation
  Version pcg-c-basic-0.9
 
"""
 
from sys import getsizeof
 
 
_MASK32 = 2**32-1   # uint32_t
_MASK64 = 2**64-1   # uint64_t
 
 
def state_setseq_64(state=0, inc=0):
    """
    struct pcg_state_setseq_64 {    // Internals are *Private*.
        uint64_t state;             // RNG state.  All values are possible.
        uint64_t inc;               // Controls which RNG sequence (stream) is
                                    // selected. Must *always* be odd.
    };
    """
    return [state & _MASK64, inc & _MASK64]
# typedef struct pcg_state_setseq_64 pcg32_random_t;
random_t = state_setseq_64
 
# #define PCG32_INITIALIZER   { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL }
# static pcg32_random_t pcg32_global = PCG32_INITIALIZER;
_GLOBAL = random_t(0x853c49e6748fea9b, 0xda3e39cb94b95bdb)
 
 
def srandom_r(rng, initstate, initseq):
    """
    void pcg32_srandom_r(pcg32_random_t* rng, uint64_t initstate, uint64_t initseq)
    {
        rng->state = 0U;
        rng->inc = (initseq << 1u) | 1u;
        pcg32_random_r(rng);
        rng->state += initstate;
        pcg32_random_r(rng);
    }
    """
    rng[::] = [0, (initseq << 1) & _MASK64 | 1]
    random_r(rng)
    rng[0] += initstate
    rng[0] &= _MASK64
    random_r(rng)
 
 
def srandom(seed, seq):
    """
    void pcg32_srandom(uint64_t seed, uint64_t seq)
    {
        pcg32_srandom_r(&pcg32_global, seed, seq);
    }
    """
    srandom_r(_GLOBAL, seed & _MASK64, seq & _MASK64)
 
 
def random_r(rng):
    """
    uint32_t pcg32_random_r(pcg32_random_t* rng)
    {
        uint64_t oldstate = rng->state;
        rng->state = oldstate * 6364136223846793005ULL + rng->inc;
        uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u;
        uint32_t rot = oldstate >> 59u;
        return (xorshifted >> rot) | (xorshifted << ((-rot) & 31));
    }
    """
    oldstate, inc = rng
    rng[0] = (oldstate * 6364136223846793005 + inc) & _MASK64
    xorshifted = (((oldstate >> 18) ^ oldstate) >> 27) & _MASK32
    rot = (oldstate >> 59) & _MASK32
    return ((xorshifted >> rot) | (xorshifted << ((-rot) & 31))) & _MASK32
 
def random():
    """
    uint32_t pcg32_random()
    {
        return pcg32_random_r(&pcg32_global);
    }
    """
    return random_r(_GLOBAL)
 
 
def boundedrand_r(rng, bound):
    """
    uint32_t pcg32_boundedrand_r(pcg32_random_t* rng, uint32_t bound)
    {
        // To avoid bias, we need to make the range of the RNG a multiple of
        // bound, which we do by dropping output less than a threshold.
        // A naive scheme to calculate the threshold would be to do
        //
        //     uint32_t threshold = 0x100000000ull % bound;
        //
        // but 64-bit div/mod is slower than 32-bit div/mod (especially on
        // 32-bit platforms).  In essence, we do
        //
        //     uint32_t threshold = (0x100000000ull-bound) % bound;
        //
        // because this version will calculate the same modulus, but the LHS
        // value is less than 2^32.
 
        uint32_t threshold = -bound % bound;
 
        // Uniformity guarantees that this loop will terminate.  In practice, it
        // should usually terminate quickly; on average (assuming all bounds are
        // equally likely), 82.25% of the time, we can expect it to require just
        // one iteration.  In the worst case, someone passes a bound of 2^31 + 1
        // (i.e., 2147483649), which invalidates almost 50% of the range.  In
        // practice, bounds are typically small and only a tiny amount of the range
        // is eliminated.
        for (;;) {
            uint32_t r = pcg32_random_r(rng);
            if (r >= threshold)
                return r % bound;
        }
    }
    """
    bound &= _MASK32
    threshold = ((-bound) & _MASK32) % bound
    while True:
        r = random_r(rng)
        if r >= threshold:
            return r % bound
 
def boundedrand(bound):
    """
    uint32_t pcg32_boundedrand(uint32_t bound)
    {
        return pcg32_boundedrand_r(&pcg32_global, bound);
    }
    """
    return boundedrand_r(_GLOBAL, bound)
 
 
if __name__ == '__main__':
    # // In this version of the code, we'll use a local rng, rather than the
    # // global one.
    # pcg32_random_t rng;
    rng = random_t()
    # // Seed with a fixed constant
    # pcg32_srandom_r(&rng, 42u, 54u);
    srandom_r(rng, 42, 54)
 
    print("""pcg32_random_r:
      -  result:      32-bit unsigned int (uint32_t)
      -  period:      2^64   (* 2^63 streams)
      -  state type:  pcg32_random_t (%i bytes)
      -  output func: XSH-RR\n""" % getsizeof(rng))
 
    #print("  Make some 32-bit numbers:")
    print('  32bit: ' + ' '.join('0x%08x' % random_r(rng) for _ in range(6)))
    #print("  Toss some coins:")
    print('  Coins: ' + ''.join('TH'[boundedrand_r(rng, 2)] for _ in range(65)))
    #print("  Roll a die:")
    print('  Rolls: ' + ' '.join('%i' % (1 + boundedrand_r(rng, 6)) for _ in range(33)))
 

Outputs match between the C and Python versions (well enough to show they are generating the same sequences - The C goes on to generate shuffled cards from a deck).

Python PCG in the Standard Lib.?

From the description of its properties I wouldn't be surprised if PCG, (the full-fat 64 bit version that passes the larger test-suite, unlike our current  Mersenne Twister algorithm), makes it into the Standard library, although I cannot tell if it would displace MT as the default generator for the random module any-time soon.