The site has a pcg32 baby implementation in C that I chose to convert to plain python to learn more about the algorithm.
The pcg32 C code is written to be quite lean-n-mean but Python doesn't do fast integer bit operations - its a higher level language, integers automatically extend to however bits are necessary for computing a result without over/underflow. I therefore needed to use 32 and 64 bit bit-masks to constrain results to the uint32 and uint64 types of the C code.
The C struct pcg_state_setseq_64 is mutable and passed between functions. I chose to model this with a two element Python list - the closest I could think of to a lightweight mutable namedtuple (as there is no such thing in the standard library).
The code is commented with the C-code it is derived from:
# -*- coding: utf-8 -*- """ Created on Thu Feb 26 22:58:33 2015 @author: Paddy3118 PCG32 translated from the C code: http://www.pcg-random.org/download.html#minimal-c-implementation Version pcg-c-basic-0.9 """ from sys import getsizeof _MASK32 = 2**32-1 # uint32_t _MASK64 = 2**64-1 # uint64_t def state_setseq_64(state=0, inc=0): """ struct pcg_state_setseq_64 { // Internals are *Private*. uint64_t state; // RNG state. All values are possible. uint64_t inc; // Controls which RNG sequence (stream) is // selected. Must *always* be odd. }; """ return [state & _MASK64, inc & _MASK64] # typedef struct pcg_state_setseq_64 pcg32_random_t; random_t = state_setseq_64 # #define PCG32_INITIALIZER { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL } # static pcg32_random_t pcg32_global = PCG32_INITIALIZER; _GLOBAL = random_t(0x853c49e6748fea9b, 0xda3e39cb94b95bdb) def srandom_r(rng, initstate, initseq): """ void pcg32_srandom_r(pcg32_random_t* rng, uint64_t initstate, uint64_t initseq) { rng->state = 0U; rng->inc = (initseq << 1u) | 1u; pcg32_random_r(rng); rng->state += initstate; pcg32_random_r(rng); } """ rng[::] = [0, (initseq << 1) & _MASK64 | 1] random_r(rng) rng[0] += initstate rng[0] &= _MASK64 random_r(rng) def srandom(seed, seq): """ void pcg32_srandom(uint64_t seed, uint64_t seq) { pcg32_srandom_r(&pcg32_global, seed, seq); } """ srandom_r(_GLOBAL, seed & _MASK64, seq & _MASK64) def random_r(rng): """ uint32_t pcg32_random_r(pcg32_random_t* rng) { uint64_t oldstate = rng->state; rng->state = oldstate * 6364136223846793005ULL + rng->inc; uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; uint32_t rot = oldstate >> 59u; return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); } """ oldstate, inc = rng rng[0] = (oldstate * 6364136223846793005 + inc) & _MASK64 xorshifted = (((oldstate >> 18) ^ oldstate) >> 27) & _MASK32 rot = (oldstate >> 59) & _MASK32 return ((xorshifted >> rot) | (xorshifted << ((-rot) & 31))) & _MASK32 def random(): """ uint32_t pcg32_random() { return pcg32_random_r(&pcg32_global); } """ return random_r(_GLOBAL) def boundedrand_r(rng, bound): """ uint32_t pcg32_boundedrand_r(pcg32_random_t* rng, uint32_t bound) { // To avoid bias, we need to make the range of the RNG a multiple of // bound, which we do by dropping output less than a threshold. // A naive scheme to calculate the threshold would be to do // // uint32_t threshold = 0x100000000ull % bound; // // but 64-bit div/mod is slower than 32-bit div/mod (especially on // 32-bit platforms). In essence, we do // // uint32_t threshold = (0x100000000ull-bound) % bound; // // because this version will calculate the same modulus, but the LHS // value is less than 2^32. uint32_t threshold = -bound % bound; // Uniformity guarantees that this loop will terminate. In practice, it // should usually terminate quickly; on average (assuming all bounds are // equally likely), 82.25% of the time, we can expect it to require just // one iteration. In the worst case, someone passes a bound of 2^31 + 1 // (i.e., 2147483649), which invalidates almost 50% of the range. In // practice, bounds are typically small and only a tiny amount of the range // is eliminated. for (;;) { uint32_t r = pcg32_random_r(rng); if (r >= threshold) return r % bound; } } """ bound &= _MASK32 threshold = ((-bound) & _MASK32) % bound while True: r = random_r(rng) if r >= threshold: return r % bound def boundedrand(bound): """ uint32_t pcg32_boundedrand(uint32_t bound) { return pcg32_boundedrand_r(&pcg32_global, bound); } """ return boundedrand_r(_GLOBAL, bound) if __name__ == '__main__': # // In this version of the code, we'll use a local rng, rather than the # // global one. # pcg32_random_t rng; rng = random_t() # // Seed with a fixed constant # pcg32_srandom_r(&rng, 42u, 54u); srandom_r(rng, 42, 54) print("""pcg32_random_r: - result: 32-bit unsigned int (uint32_t) - period: 2^64 (* 2^63 streams) - state type: pcg32_random_t (%i bytes) - output func: XSH-RR\n""" % getsizeof(rng)) #print(" Make some 32-bit numbers:") print(' 32bit: ' + ' '.join('0x%08x' % random_r(rng) for _ in range(6))) #print(" Toss some coins:") print(' Coins: ' + ''.join('TH'[boundedrand_r(rng, 2)] for _ in range(65))) #print(" Roll a die:") print(' Rolls: ' + ' '.join('%i' % (1 + boundedrand_r(rng, 6)) for _ in range(33)))
Outputs match between the C and Python versions (well enough to show they are generating the same sequences - The C goes on to generate shuffled cards from a deck).
Python PCG in the Standard Lib.?
From the description of its properties I wouldn't be surprised if PCG, (the full-fat 64 bit version that passes the larger test-suite, unlike our current Mersenne Twister algorithm), makes it into the Standard library, although I cannot tell if it would displace MT as the default generator for the random module any-time soon.
No comments:
Post a Comment