In [1]:
#!pip install import_ipynb
In [2]:
import import_ipynb
import set_consolidation_speed
from set_consolidation_speed import consolidate, consolidate_np, consolidate_ramr, \
weighted_choice
In [3]:
import random
from pprint import pprint as pp
def rand_data_gen2(n=5, aspect=1, silent=False):
if not silent:
"Gen n//a sublists of randomised data with average set size multiplied by aspect"
# Need a pool of items to randomly select for each group
n_pool_items = int(2.5 * n)
pool_strings = [f'I{x}' for x in range(n_pool_items)]
# groups will be of ranodom but weighted size,
group_len_weights = ([(x, 5) for x in range(1, 6)] + # lengths of 1..5 weights
[(x, 2) for x in range(6, 10)] + # lengths of 6..9 weights
[(x, 1) for x in range(10, 16)]) # lengths of 10..15 weights
group_len_weights = [(ln, wt) for ln, wt in group_len_weights if ln <= n_pool_items]
group_len_gen = weighted_choice(group_len_weights)
data = [random.sample(pool_strings, next(group_len_gen) * aspect)
for _ in range(n // aspect)]
# stats
avg = sum(len(x) for x in data) / len(data)
if not silent:
print(f"""
GENERATED DATA STATS:
n: {n}
Aspect ratio: {aspect}
Row count: {len(data)}
Pool of random items: {n_pool_items}
Average count of items per group: {avg}
""")
return data, avg
In [4]:
# This is just for repeatability and can be deleted
random.seed('RANDOM TEST DATA GENERATION')
In [5]:
if __name__ == '__main__':
for aspect_ratio in (1, 2, 4):
rand_data_gen2(2**15, aspect_ratio)
In [6]:
import timeit
import datetime
import pandas as pd
import copy
def runtime2(methods, random_data):
"Time the methods acting on the data"
rep = 2
times = []
runtime2.random_data = random_data
for method in methods:
name = method.__name__
cmd = f"{name}(ddd)"
setup = (f"from __main__ import copy, {name}, runtime2\n"
"ddd = copy.deepcopy(runtime2.random_data)")
t = timeit.Timer(cmd, setup
).timeit(number=rep) / rep
times.append(datetime.timedelta(seconds=t))
return times
def method_compare(methods, rows=[128, 256], aspects=[1, 2, 4]):
"Compare timings of methods with data generated from specified row counts and aspect ratios"
results = {}
for key in 'n, aspect, rows, avg_set_len:'.split():
results[key] = []
method_names = [m.__name__ for m in methods]
for key in method_names:
results[key] = []
for n in rows:
for aspect in aspects:
data, avg_set_len = rand_data_gen2(n, aspect, silent=True)
times = runtime2(methods, data)
for key, val in zip('n, aspect, rows, avg_set_len:'.split(),
(n, aspect, n // aspect, avg_set_len)):
results[key].append(val)
for key, val in zip(method_names, times):
results[key].append(val)
return pd.DataFrame(results)
In [7]:
methods = [consolidate, consolidate_ramr, consolidate_np]
rows = [2**4, 2**8]
aspects = [1, 2]
results = method_compare(methods, rows, aspects)
results
Out[7]:
In [8]:
methods = [consolidate, consolidate_ramr]
rows = [2**10, 2**12]
aspects = [2**x for x in range(7)]
results2 = method_compare(methods, rows, aspects)
results2
Out[8]:
In [9]:
methods = [consolidate, consolidate_ramr]
rows = [2**x for x in range(10, 14)]
aspects = [1, 4, 16, 64, 256]
for n in rows:
res = method_compare(methods, [n], aspects)
res.plot.line(y=['consolidate', 'consolidate_ramr'], x='avg_set_len:', title=f'RUNTIME. n={n} for all aspect changes')
In [10]:
display(res)
In [11]:
# consolidate RUNTIME CHANGES OVER 1-to-256 ASPECT CHANGE
res['consolidate'][0].value / res['consolidate'][1].value
Out[11]:
In [12]:
# consolidate_ramr RUNTIME CHANGES OVER 1-to-256 ASPECT CHANGE
res['consolidate_ramr'][0].value / res['consolidate_ramr'][1].value
Out[12]:
In [13]:
methods = [consolidate_ramr]
rows = [2**x for x in [16, 20]]
aspects = [1, 4, 16, 64, 256]
for n in rows:
res = method_compare(methods, [n], aspects)
res.plot.line(y=['consolidate_ramr'], x='avg_set_len:', title=f'RUNTIME. n={n} for all aspect changes')
display(res)
In [14]:
# consolidate_ramr RUNTIME CHANGES OVER 1-to-256 ASPECT CHANGE
res['consolidate_ramr'][0].value / res['consolidate_ramr'][1].value
Out[14]:
In [ ]: