#!/bin/env python
"""
Extract repeated blocks of lines information from a file
Usage:
block_lines (rep|exp) [options] [INPUT-FILE [OUTPUT-FILE]]
block_lines gen [options] [OUTPUT-FILE]
block_lines (-h|--help|--version)
Modes:
rep Find the repeated blocks of lines.
exp Expand the output from rep.
gen (Random gen of output suitable for example rep mode processing).
Options:
-h --help Show this screen.
-v --version Show version.
-w N, --width N Digits in field for repetition count [default: 4].
-d --debug Debug:
* In rep mode remove anything after '#' on all input lines.
* In gen mode annotate with repeat counts after '#'.
I/O Defaults:
INPUT-FILE Defaults to stdin.
OUTPUT-FILE Defaults to stdout.
"""
from __future__ import print_function
import re
import sys
import random
from docopt import docopt
__all__ = 'block_rep block_exp block_gen'.split()
__version__ = '0.0.1'
# noinspection PyShadowingNames,PyUnusedLocal
def _compressed(output, blines, brep, width):
"""output += annotated, repeated block of lines"""
if len(blines) == 1:
output += [' %*i {} %s' % (width, brep, blines[0])]
else:
output += [' %*i { %s' % (width, brep, blines[0])]
output += [' %*s %s' % (width, '', singleline)
for singleline in blines[1:-1]]
output += [' %*s } %s' % (width, '', blines[-1])]
# noinspection PyShadowingNames
def block_rep(text, width=3):
"""return repeated blocks of lines in text with their repeat count"""
output = []
lastend = 0
for match in re.finditer(r"""(?ms)(?P<repeat>(?P<lines>^.*?$)(?:\n(?P=lines))+)""", text):
beginpos, endpos = match.span()
if lastend < beginpos: # Skipped a non-repeated, 'single' block
_compressed(output,
blines=text[lastend:beginpos - 1].split('\n'),
brep=1,
width=width)
bsize, repeatedlines = sorted(x.count('\n') + 1 for x in match.groupdict().values())
_compressed(output,
blines=match.groupdict()['lines'].split('\n'),
brep=repeatedlines // bsize,
width=width)
lastend = endpos + 1
if lastend < len(text) - 1: # Add the non-repeated, 'single' block at end
_compressed(output,
blines=text[lastend:len(text)].split('\n'),
brep=1,
width=width)
return '\n'.join(output)
# noinspection PyShadowingNames
def block_exp(text):
"""Expand lines"""
# The column that lines start at after the block repetition info to the left.
tagcolumn = len(re.search(r"""(?m)^\s+} """, text).group())
output = []
for match in re.finditer(r"""(?msx)(?P<block>
(?P<multiline>
^ \s* (?P<rep> \d+) \s+ [{] \s\s\s (?P<first> .*?$)
(?P<middle> .*?$) \n
(?: (?P<repcolumn>^\s+ } \s\s )(?P<last> [^\n]*))
)
|
(?P<singleline>
^ \s* (?P<srep> \d+) \s+ [{][}] \s\s (?P<only> .*?$)
)
)""", text):
if match.group('multiline'):
(rep, first, middle, repcolumn,
last) = [match.group(name)
for name in 'rep, first, middle, repcolumn, last'.split(', ')]
rep = int(rep)
blocklines = [first]
# The column that lines start at after the block repetition info to the left.
tagcolumn = len(repcolumn)
if middle and middle[0] == '\n':
middle = middle[1:]
blocklines += [line[tagcolumn:] for line in middle.split('\n') if middle]
blocklines += [last]
output += blocklines * rep
elif match.group('singleline'):
srep, only = [match.group(name) for name in 'srep, only'.split(', ')]
srep = int(srep)
output += [only] * srep
return '\n'.join(output)
def block_gen(tags='DATUM ERROR1 ERROR2 ERROR3 CHANGE RESET1 RESET2 RESET3 STAGED'.split(),
minblocks=100, debug=False):
"""Generate repeated blocks of lines to be later found by block_rep"""
series = []
while len(series) < minblocks:
blocksize = min(len(tags), random.choice([1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 7, 8]))
blockrep = random.choice([1, 1, 1, 2, 2, 3])
blocklines = [random.choice(tags) for _ in range(blocksize)]
expanded = blocklines * blockrep
if debug: # Add annotations
expanded[0] += '\t# %i {' % blockrep # Annotate with repeat count and block start
if blocksize == 1:
expanded[0] += '}' # Annotate with end of 1 line block
else:
expanded[blocksize - 1] += '\t# }' # Annotate with end of repeated block
series += expanded
return '\n'.join(series)
if __name__ == '__main__':
arguments = docopt(__doc__, version=__version__)
arguments["--width"] = int(arguments["--width"])
if arguments["rep"] or arguments["exp"]:
with (sys.stdin
if arguments["INPUT-FILE"] is None
else open(arguments["INPUT-FILE"], 'r')) as f:
text = f.read()
#
if arguments["rep"] and arguments["--debug"]:
# Remove mode gen-type comments
# noinspection PyUnboundLocalVariable
text = re.sub(r'\s+# .*', '', text)
#
if arguments["rep"]:
# noinspection PyUnboundLocalVariable
output = block_rep(text, width=arguments["--width"])
elif arguments["exp"]:
# noinspection PyUnboundLocalVariable
output = block_exp(text)
elif arguments["gen"]:
# noinspection PyUnboundLocalVariable
output = block_gen(debug=arguments["--debug"])
if arguments["rep"] or arguments["exp"] or arguments["gen"]:
with (sys.stdout
if arguments["OUTPUT-FILE"] is None
else open(arguments["OUTPUT-FILE"], 'w')) as f:
f.write(output)