Go deh!

Mainly Tech projects on Python and Electronic Design Automation.

Sunday, March 01, 2015

Random number generator PCG32 translated to Python.

There's a new family of pseudo-random number generators called PCG going around with a great Video introducing the problem that it solves:


The site has a pcg32 baby implementation in C that I chose to convert to plain python to learn more about the algorithm.

The pcg32 C code is written to be quite lean-n-mean but Python doesn't do fast integer bit operations - its a higher level language, integers automatically extend to however bits are necessary for computing a result without over/underflow. I therefore needed to use 32 and 64 bit bit-masks to constrain results to the uint32 and uint64 types of the C code.

The C struct pcg_state_setseq_64 is mutable and passed between functions. I chose to model this with a two element Python list - the closest I could think of to a lightweight mutable namedtuple (as there is no such thing in the standard library).

The code is commented with the C-code it is derived from:

# -*- coding: utf-8 -*-
"""
Created on Thu Feb 26 22:58:33 2015
 
@author: Paddy3118
 
 
PCG32 translated from the C code:
  http://www.pcg-random.org/download.html#minimal-c-implementation
  Version pcg-c-basic-0.9
 
"""
 
from sys import getsizeof
 
 
_MASK32 = 2**32-1   # uint32_t
_MASK64 = 2**64-1   # uint64_t
 
 
def state_setseq_64(state=0, inc=0):
    """
    struct pcg_state_setseq_64 {    // Internals are *Private*.
        uint64_t state;             // RNG state.  All values are possible.
        uint64_t inc;               // Controls which RNG sequence (stream) is
                                    // selected. Must *always* be odd.
    };
    """
    return [state & _MASK64, inc & _MASK64]
# typedef struct pcg_state_setseq_64 pcg32_random_t;
random_t = state_setseq_64
 
# #define PCG32_INITIALIZER   { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL }
# static pcg32_random_t pcg32_global = PCG32_INITIALIZER;
_GLOBAL = random_t(0x853c49e6748fea9b, 0xda3e39cb94b95bdb)
 
 
def srandom_r(rng, initstate, initseq):
    """
    void pcg32_srandom_r(pcg32_random_t* rng, uint64_t initstate, uint64_t initseq)
    {
        rng->state = 0U;
        rng->inc = (initseq << 1u) | 1u;
        pcg32_random_r(rng);
        rng->state += initstate;
        pcg32_random_r(rng);
    }
    """
    rng[::] = [0, (initseq << 1) & _MASK64 | 1]
    random_r(rng)
    rng[0] += initstate
    rng[0] &= _MASK64
    random_r(rng)
 
 
def srandom(seed, seq):
    """
    void pcg32_srandom(uint64_t seed, uint64_t seq)
    {
        pcg32_srandom_r(&pcg32_global, seed, seq);
    }
    """
    srandom_r(_GLOBAL, seed & _MASK64, seq & _MASK64)
 
 
def random_r(rng):
    """
    uint32_t pcg32_random_r(pcg32_random_t* rng)
    {
        uint64_t oldstate = rng->state;
        rng->state = oldstate * 6364136223846793005ULL + rng->inc;
        uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u;
        uint32_t rot = oldstate >> 59u;
        return (xorshifted >> rot) | (xorshifted << ((-rot) & 31));
    }
    """
    oldstate, inc = rng
    rng[0] = (oldstate * 6364136223846793005 + inc) & _MASK64
    xorshifted = (((oldstate >> 18) ^ oldstate) >> 27) & _MASK32
    rot = (oldstate >> 59) & _MASK32
    return ((xorshifted >> rot) | (xorshifted << ((-rot) & 31))) & _MASK32
 
def random():
    """
    uint32_t pcg32_random()
    {
        return pcg32_random_r(&pcg32_global);
    }
    """
    return random_r(_GLOBAL)
 
 
def boundedrand_r(rng, bound):
    """
    uint32_t pcg32_boundedrand_r(pcg32_random_t* rng, uint32_t bound)
    {
        // To avoid bias, we need to make the range of the RNG a multiple of
        // bound, which we do by dropping output less than a threshold.
        // A naive scheme to calculate the threshold would be to do
        //
        //     uint32_t threshold = 0x100000000ull % bound;
        //
        // but 64-bit div/mod is slower than 32-bit div/mod (especially on
        // 32-bit platforms).  In essence, we do
        //
        //     uint32_t threshold = (0x100000000ull-bound) % bound;
        //
        // because this version will calculate the same modulus, but the LHS
        // value is less than 2^32.
 
        uint32_t threshold = -bound % bound;
 
        // Uniformity guarantees that this loop will terminate.  In practice, it
        // should usually terminate quickly; on average (assuming all bounds are
        // equally likely), 82.25% of the time, we can expect it to require just
        // one iteration.  In the worst case, someone passes a bound of 2^31 + 1
        // (i.e., 2147483649), which invalidates almost 50% of the range.  In
        // practice, bounds are typically small and only a tiny amount of the range
        // is eliminated.
        for (;;) {
            uint32_t r = pcg32_random_r(rng);
            if (r >= threshold)
                return r % bound;
        }
    }
    """
    bound &= _MASK32
    threshold = ((-bound) & _MASK32) % bound
    while True:
        r = random_r(rng)
        if r >= threshold:
            return r % bound
 
def boundedrand(bound):
    """
    uint32_t pcg32_boundedrand(uint32_t bound)
    {
        return pcg32_boundedrand_r(&pcg32_global, bound);
    }
    """
    return boundedrand_r(_GLOBAL, bound)
 
 
if __name__ == '__main__':
    # // In this version of the code, we'll use a local rng, rather than the
    # // global one.
    # pcg32_random_t rng;
    rng = random_t()
    # // Seed with a fixed constant
    # pcg32_srandom_r(&rng, 42u, 54u);
    srandom_r(rng, 42, 54)
 
    print("""pcg32_random_r:
      -  result:      32-bit unsigned int (uint32_t)
      -  period:      2^64   (* 2^63 streams)
      -  state type:  pcg32_random_t (%i bytes)
      -  output func: XSH-RR\n""" % getsizeof(rng))
 
    #print("  Make some 32-bit numbers:")
    print('  32bit: ' + ' '.join('0x%08x' % random_r(rng) for _ in range(6)))
    #print("  Toss some coins:")
    print('  Coins: ' + ''.join('TH'[boundedrand_r(rng, 2)] for _ in range(65)))
    #print("  Roll a die:")
    print('  Rolls: ' + ' '.join('%i' % (1 + boundedrand_r(rng, 6)) for _ in range(33)))
 

Outputs match between the C and Python versions (well enough to show they are generating the same sequences - The C goes on to generate shuffled cards from a deck).

Python PCG in the Standard Lib.?

From the description of its properties I wouldn't be surprised if PCG, (the full-fat 64 bit version that passes the larger test-suite, unlike our current  Mersenne Twister algorithm), makes it into the Standard library, although I cannot tell if it would displace MT as the default generator for the random module any-time soon.

Sunday, February 15, 2015

Find repeated blocks of characters in a string: char_rep().


I saw this Stack Overflow (SO) question which I thought was asking how to show the repeated characters in a string.

I immediately thought about my earlier code from post "Find repeated blocks of lines in a text file: The block_lines tool." and immediately thought of  modifying it to get a similar result i.e. using the in-built regular expression engine to find the repetitions.

in function char_rep, a regular expression finds a section of repeated characters in a string:
    r"""(?ms)(?P<repeat>(?P<chars>.+?)(?:(?P=chars))+)"""

We find all those repeated sections in rightward order through the string as  well as detecting intervening non-repeated blocks of characters and format them appropriately and extend the output string.

The _format_* functions allow the output to be expressed as a either a regular expression that matches the input text; or alternatively as output like that asked for in the SO question.

The code

#!/bin/env python
"""
Extract repeated blocks of chars from a string
"""
from __future__ import print_function
 
import re
 
 
__all__ = ['char_rep']
__version__ = '0.0.1'
DEBUG = False
 
 
def _format_re(bchars, brep):
    'annotated in regexp-like format, repeated block of chars'
    bchars = re.escape(bchars)
    if brep > 1:
        return '(%s){%i}' % (bchars, brep)
    else:
        return bchars
 
def _format_so(bchars, brep):
    'brep(bchars) format for Stack Overflow: http://stackoverflow.com/questions/28519559/compressing-string-with-repeating-chars'
    return '%i(%s)' % (brep, bchars) if bchars else ''
 
def char_rep(txt, debug=False, _format=_format_re):
    'return repeated blocks of chars in text with their repeat count'
    output,  lastend = [], 0
    for match in re.finditer(r"""(?ms)(?P<repeat>(?P<chars>.+?)(?:(?P=chars))+)""", txt):
        if debug: print('\n  lastend, txt[lastend:] = ',lastend, repr(txt[lastend:]))
        beginpos, endpos = match.span()
        repeat, chars = match.group('repeat'), match.group('chars')
        if lastend < beginpos:  # Skipped a non-repeated, 'single' block
            output.append(_format(txt[lastend:beginpos], 1))
        output.append(_format(chars, repeat.count(chars)))
        lastend = endpos
        if debug:
            print('  match.span() = ', match.span())
            print("  match.group['repeat'] = ", repeat)
            print("  match.group['chars'] = ", chars)
            print("  repeatcount = ", repeat.count(chars))
            print("  output so far = %r" % ''.join(output))
    output = ''.join(output) + _format(txt[lastend:], 1)
    if debug: print("return %r" % output)
    return output
 
 
if __name__ == '__main__':
    for n, txt in enumerate('''
    ULULUL 
    LDDLDDLDDLDDLXXLCCLCCLCC
    LDDLDDLDDLDDLXXLCCLCCLCCC
    DDLDDLDDLDDLDDLDDLDDLDDLDDLDDL
    LDDLDDLDDLDDLDDLDDLDDLDDLDDLDD
    LLLLDLLLLDLLLLD 
    '''.strip().split(), 1):
        output_re = char_rep(txt, debug=DEBUG, _format=_format_re)
        assert re.match('^%s$' % output_re, txt), "Output is not an RE to match all of input string!"
        output_so = char_rep(txt, debug=DEBUG, _format=_format_so)
        print("String %i: %r\n  re_formatted = %r\n  so_formatted = %r\n"
               % (n, txt, output_re, output_so))
 


Code output


String 1: 'ULULUL'
  re_formatted = '(UL){3}'
  so_formatted = '3(UL)'
 
String 2: 'LDDLDDLDDLDDLXXLCCLCCLCC'
  re_formatted = '(LDD){4}L(X){2}(LCC){3}'
  so_formatted = '4(LDD)1(L)2(X)3(LCC)'
 
String 3: 'LDDLDDLDDLDDLXXLCCLCCLCCC'
  re_formatted = '(LDD){4}L(X){2}(LCC){3}C'
  so_formatted = '4(LDD)1(L)2(X)3(LCC)1(C)'
 
String 4: 'DDLDDLDDLDDLDDLDDLDDLDDLDDLDDL'
  re_formatted = '(D){2}(LDD){9}L'
  so_formatted = '2(D)9(LDD)1(L)'
 
String 5: 'LDDLDDLDDLDDLDDLDDLDDLDDLDDLDD'
  re_formatted = '(LDD){10}'
  so_formatted = '10(LDD)'
 
String 6: 'LLLLDLLLLDLLLLD'
  re_formatted = '(L){4}(DLLLL){2}D'
  so_formatted = '4(L)2(DLLLL)1(D)'
 

You should note that we are at the mercy of the regular expression so that string 6 is actually 'LLLLD' repeated three times, but the program matches it differently. I guess I could have several repeat finding regular expressions and use the one that gives the most succinct value.

Debug mode

set DEBUG to True or call char_rep(txt, debug=True) in the code and you get a listing of the intermediate values from function char_rep.

END.


Friday, October 10, 2014

Python on the RosettaCode.org site

Do you know about the Rosetta code site?

Its purpose is to allow the comparison of different programming languages by having tasks that are completed in idiomatic code from many programming languages - the idea being that because the code examples are completing the same task, you can more easily compare the languages examples.

The Python language is well represented on the site with a large percentage of the tasks completed.

The type of the examples are written in what could be grouped into two styles:

  1. Python code suitable for cutting and pasting into a .py file and executing from the file.
  2. Sessions copied from the REPL of usually an idle session as it to is an idiomatic way to use Python, and and is ideal for some of the tasks with short Python solutions.
The description of the task is assumed understood and rarely appears in code comments and docstrings. Code is rarely "optimised" as speed and memory constraints are rarely mentioned in tasks (Rosetta Code is not geared to run examples, verify timings and memory use, ...).

I start off the writing of several tasks for Rosetta Code and always accompany those with an initial Python solution, I hope that this means more users of other languages will read the Python solution as it starts as the "canonical" solution for those wanting to see how other languages approach the task

If you want to contribute, you could lurk on the site for a while. Learn how other people edit. Maybe try some of the tasks for yourself, and see how things lie before making your first edit.

Some tips:


  • One language with multiple examples is discouraged unless you are using a different algorithm or a different language style. (You might see recursive/non recursive, or OO/functional style versions occasionally, depending also on the task).
  • Improving what is there is encouraged - replacing something because you wrote a version is not encouraged.
  • We would like RC to be made available in schools and colleges so don't use language that might fail a dumb site checker. (A bit difficult with the BrainF*ck language but we try and use obfuscations like brainf*ck where we can).  

In short:

Come join us. It's fun!

Sunday, September 07, 2014

Find repeated blocks of lines in a text file: The block_lines tool.

I am doing some accelerated testing of a microcontroller running some diagnostic software that is put in a harsh environment and communicates its state and any firmware determinable errors it detects back to a controlling PC via a USB port.
Ultimately I decode from the data stream a series of appropriately named records interspersed with a few corrupted bytes from the remains of unrecoverable records.
When trying to assess what the microcontroller was doing, (as opposed to what it should be doing in normal operation), I found myself at one stage with a list of recieved records that simplify to something like this:
In [2]:
with open('example_records.log') as f:
    record_lines = f.read()
print(record_lines)
RESET1
RESET3
RESET1
RESET3
RESET1
RESET3
ERROR3
DATUM
ERROR3
DATUM
ERROR3
DATUM
CHANGE
RESET1
CHANGE
ERROR1
ERROR1
ERROR3
RESET1
ERROR3
RESET1
ERROR3
RESET1
ERROR1
STAGED
ERROR1
STAGED
DATUM
ERROR1
RESET1
DATUM
RESET1
ERROR2
DATUM
ERROR1
RESET1
DATUM
RESET1
ERROR2
DATUM
ERROR1
RESET1
DATUM
RESET1
ERROR2
ERROR2
RESET3
ERROR2
RESET3
RESET2
RESET1
RESET2
RESET1
ERROR2
RESET3
DATUM
CHANGE
RESET2
RESET1
RESET2
DATUM
CHANGE
DATUM
CHANGE
DATUM
CHANGE
RESET1
RESET1
ERROR1
ERROR3
ERROR2
RESET2
DATUM
RESET2
RESET3
STAGED
ERROR1
ERROR3
ERROR2
RESET2
DATUM
RESET2
RESET3
STAGED
ERROR1
ERROR3
ERROR2
RESET2
DATUM
RESET2
RESET3
STAGED
STAGED
CHANGE
ERROR3
ERROR2
RESET3
CHANGE
ERROR1
ERROR1


Meaning from chaos

When discussing with colleagues what was wrong with the above record sequence and what could be the cause, we spent a lot of time looking for repeated patterns in the series of records by eye.
I searched for, and could not find an existing tool to find repeated blocks of whole lines in a file. Stackoverflow gave me no more than my own suggestions so I decided to develop my own utility.

Fun with Regexps

My idea was to investigate if some regexp could extract repeated blocks of lines. If it's regexp exploration, then it is out with Kodos.
Cut'n'paste the example records into the Search string box, then I can develop the regexp in the top window.
In [2]:
from IPython.core.display import Image 
Image(filename='files/block_rep_in_kodos.png') 
Out[2]:
The essence of this first regexp of:
(?ms)(?P<repeat>(?P<lines>^.*?$)(?:\n(?P=lines))+)
is read in sections from the inside out as:
^.*?$             # Match a series of whole lines, non-greedily due to the ?
(?P<lines>^.*?$)  # Give the matched lines the name "lines"
(?:\n(?P=lines))+ # Match repetitions of '\n' followed by the group named "lines"
(?P<repeat>(?P<lines>^.*?$)(?:\n(?P=lines))+) # call the repeated block of lines "repeat"

The kodos Group window shows the first match where the two "lines" of "RESET13" are repeated three times to give the "repeat" group.
Use regexp.findall with the above regexp and I created the heart of my script.

The block_lines utility

The final utility was in three parts:
  1. block_gen, to create test data of records with optional debug information showing the repeats in the generated blocks
  2. block_rep, to extract and show the repeated blocks.
  3. block_exp, to expand output from block_rep so you can round-trip the data.

block_gen output

In this sample generated in debug mode, everything matching ' # .*' at the end of any line is debug comments showing what is repeated lines between curly braces), and by how many times.
For example in this:
In [3]:
RESET1 # 3 {}
RESET1
RESET1
RESET3 # 1 {
ERROR2
ERROR1 # }
STAGED # 1 {
ERROR1
CHANGE
ERROR3
RESET2
RESET1
CHANGE # }
ERROR3 # 2 {
RESET2
RESET2
STAGED
STAGED
ERROR2
RESET2
RESET1 # }
ERROR3
RESET2
RESET2
STAGED
STAGED
ERROR2
RESET2
RESET1
DATUM # 1 {
RESET3 # }
ERROR2 # 2 {
ERROR1 # }
ERROR2
ERROR1
DATUM # 1 {
DATUM
ERROR2 # }
ERROR2 # 1 {
DATUM
RESET1 # }
...

The comment of ' # 3 {}' means that the first line 'RESET1' is repeated three times.
The fourth line starts a block that occurs only 1 time and ends at line 6 because of the closing curly brace.

block_rep output

Although block_gen can show what repetitions were used when generating record sequences, block_rep is likely to find its own set of repeated blocks as the generator might generated a record repeated twice then generate the same record repeated three times more. block_rep may treat that as the one block repeated five times.
block_rep takes a file of lines and in debug mode strips off the ' #.
In [2]:
  3 {}  RESET1
  1 {   RESET3
        ERROR2
        ERROR1
        STAGED
        ERROR1
        CHANGE
        ERROR3
        RESET2
        RESET1
     }  CHANGE
  2 {   ERROR3
        RESET2
        RESET2
        STAGED
        STAGED
        ERROR2
        RESET2
     }  RESET1
  1 {   DATUM
     }  RESET3
  2 {   ERROR2
     }  ERROR1
  2 {}  DATUM
  2 {}  ERROR2
'''

Note that in this form the repetition information is on the left and although the repeat counts of each block is present, only the block that is repeated is shown - not the expanded repetitions.

Docopt

I used the docopt Python library for the argument handling of the final utility:
PS C:\Users\Paddy\Google Drive\Code> python .\block_lines -h

Extract repeated blocks of lines information from a file

Usage:
  block_lines (rep|exp) [options]  [INPUT-FILE [OUTPUT-FILE]]
  block_lines gen [options]  [OUTPUT-FILE]
  block_lines (-h|--help|--version)

Modes:
  rep               Find the repeated blocks of lines.
  exp               Expand the output from rep.
  gen               (Random gen of output suitable for example rep mode processing).

Options:
  -h --help         Show this screen.
  -v --version      Show version.
  -w N, --width N   Digits in field for repetition count [default: 4].
  -d --debug        Debug:
                      * In rep mode remove anything after '#' on all input lines.
                      * In gen mode annotate with repeat counts after '#'.

I/O Defaults:
  INPUT-FILE        Defaults to stdin.
  OUTPUT-FILE       Defaults to stdout.

Comments

  • Note that I did not have to think too hard about an algorithm for extracting repeated blocks - I make the regexp library take the strain.
  • It is fast enough for my purposes - I do slurp the whole file into memory at once for example, but that is fine for my real world data so no (over) optimization is needed.
  • The utility proved invaluable in being able to discuss and come to a consesnsus with colleagues about some experimental data we had. We could condense tens of thousands of lines, without bias, and reason about the result.
  • Real-world data might need pre-conditioning before being able to usufully apply the utility, for example: fixed fields of varying value such as indices, timestamps, and maybe usernames, might need to be deleted or replaced with a fixed dummy value.

Show me the code!

License: It's mine, mine mine do you hear me!
In [3]:
#!/bin/env python
"""
Extract repeated blocks of lines information from a file

Usage:
  block_lines (rep|exp) [options]  [INPUT-FILE [OUTPUT-FILE]]
  block_lines gen [options]  [OUTPUT-FILE]
  block_lines (-h|--help|--version)

Modes:
  rep               Find the repeated blocks of lines.
  exp               Expand the output from rep.
  gen               (Random gen of output suitable for example rep mode processing).

Options:
  -h --help         Show this screen.
  -v --version      Show version.
  -w N, --width N   Digits in field for repetition count [default: 4].
  -d --debug        Debug:
                      * In rep mode remove anything after '#' on all input lines.
                      * In gen mode annotate with repeat counts after '#'.

I/O Defaults:
  INPUT-FILE        Defaults to stdin.
  OUTPUT-FILE       Defaults to stdout.

"""
from __future__ import print_function

import re
import sys
import random

from docopt import docopt


__all__ = 'block_rep block_exp block_gen'.split()
__version__ = '0.0.1'


# noinspection PyShadowingNames,PyUnusedLocal
def _compressed(output, blines, brep, width):
    """output += annotated, repeated block of lines"""
    if len(blines) == 1:
        output += [' %*i {}  %s' % (width, brep, blines[0])]
    else:
        output += [' %*i {   %s' % (width, brep, blines[0])]
        output += [' %*s     %s' % (width, '', singleline)
                   for singleline in blines[1:-1]]
        output += [' %*s  }  %s' % (width, '', blines[-1])]


# noinspection PyShadowingNames
def block_rep(text, width=3):
    """return repeated blocks of lines in text with their repeat count"""
    output = []
    lastend = 0
    for match in re.finditer(r"""(?ms)(?P<repeat>(?P<lines>^.*?$)(?:\n(?P=lines))+)""", text):
        beginpos, endpos = match.span()
        if lastend < beginpos:  # Skipped a non-repeated, 'single' block
            _compressed(output,
                        blines=text[lastend:beginpos - 1].split('\n'),
                        brep=1,
                        width=width)
        bsize, repeatedlines = sorted(x.count('\n') + 1 for x in match.groupdict().values())
        _compressed(output,
                    blines=match.groupdict()['lines'].split('\n'),
                    brep=repeatedlines // bsize,
                    width=width)
        lastend = endpos + 1
    if lastend < len(text) - 1:  # Add the non-repeated, 'single' block at end
        _compressed(output,
                    blines=text[lastend:len(text)].split('\n'),
                    brep=1,
                    width=width)
    return '\n'.join(output)


# noinspection PyShadowingNames
def block_exp(text):
    """Expand lines"""
    # The column that lines start at after the block repetition info to the left.
    tagcolumn = len(re.search(r"""(?m)^\s+}  """, text).group())
    output = []
    for match in re.finditer(r"""(?msx)(?P<block>
                              (?P<multiline>
                                ^ \s* (?P<rep> \d+)  \s+ [{] \s\s\s (?P<first> .*?$)
                                (?P<middle> .*?$) \n
                                (?:  (?P<repcolumn>^\s+ } \s\s )(?P<last> [^\n]*))
                              )
                             |
                              (?P<singleline>
                                ^ \s* (?P<srep> \d+)  \s+ [{][}] \s\s (?P<only> .*?$)
                              )
                            )""", text):
        if match.group('multiline'):
            (rep, first, middle, repcolumn,
             last) = [match.group(name)
                      for name in 'rep, first, middle, repcolumn, last'.split(', ')]
            rep = int(rep)
            blocklines = [first]
            # The column that lines start at after the block repetition info to the left.
            tagcolumn = len(repcolumn)
            if middle and middle[0] == '\n':
                middle = middle[1:]
            blocklines += [line[tagcolumn:] for line in middle.split('\n') if middle]
            blocklines += [last]
            output += blocklines * rep
        elif match.group('singleline'):
            srep, only = [match.group(name) for name in 'srep, only'.split(', ')]
            srep = int(srep)
            output += [only] * srep
    return '\n'.join(output)


def block_gen(tags='DATUM ERROR1 ERROR2 ERROR3 CHANGE RESET1 RESET2 RESET3 STAGED'.split(),
              minblocks=100, debug=False):
    """Generate repeated blocks of lines to be later found by block_rep"""
    series = []
    while len(series) < minblocks:
        blocksize = min(len(tags), random.choice([1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 7, 8]))
        blockrep = random.choice([1, 1, 1, 2, 2, 3])
        blocklines = [random.choice(tags) for _ in range(blocksize)]
        expanded = blocklines * blockrep
        if debug:  # Add annotations
            expanded[0] += '\t# %i {' % blockrep  # Annotate with repeat count and block start
            if blocksize == 1:
                expanded[0] += '}'  # Annotate with end of 1 line block
            else:
                expanded[blocksize - 1] += '\t# }'  # Annotate with end of repeated block
        series += expanded
    return '\n'.join(series)


if __name__ == '__main__':
    arguments = docopt(__doc__, version=__version__)
    arguments["--width"] = int(arguments["--width"])
    if arguments["rep"] or arguments["exp"]:
        with (sys.stdin
              if arguments["INPUT-FILE"] is None
              else open(arguments["INPUT-FILE"], 'r')) as f:
            text = f.read()
    #
    if arguments["rep"] and arguments["--debug"]:
        # Remove mode gen-type  comments
        # noinspection PyUnboundLocalVariable
        text = re.sub(r'\s+# .*', '', text)
    #
    if arguments["rep"]:
        # noinspection PyUnboundLocalVariable
        output = block_rep(text, width=arguments["--width"])
    elif arguments["exp"]:
        # noinspection PyUnboundLocalVariable
        output = block_exp(text)
    elif arguments["gen"]:
        # noinspection PyUnboundLocalVariable
        output = block_gen(debug=arguments["--debug"])
    if arguments["rep"] or arguments["exp"] or arguments["gen"]:
        with (sys.stdout
              if arguments["OUTPUT-FILE"] is None
              else open(arguments["OUTPUT-FILE"], 'w')) as f:
            f.write(output)

Followers

Subscribe Now: google

Add to Google Reader or Homepage

Go deh too!

whos.amung.us

Blog Archive