diff options
| author | Stefan Boberg <[email protected]> | 2026-02-23 11:19:52 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-02-23 11:19:52 +0100 |
| commit | 9aac0fd369b87e965fb34b5168646387de7ea1cd (patch) | |
| tree | 367a820685a829adbab31cd1374b1af2cece4b7e /thirdparty/ryml/api/python/bm/bm_parse.py | |
| parent | changed command names and descriptions to use class members instead of string... (diff) | |
| download | zen-9aac0fd369b87e965fb34b5168646387de7ea1cd.tar.xz zen-9aac0fd369b87e965fb34b5168646387de7ea1cd.zip | |
implement yaml generation (#774)
this implements a yaml generation strategy similar to the JSON generation where we just build a string instead of building a ryml tree.
This also removes the dependency on ryml for reduced binary/build times.
Diffstat (limited to 'thirdparty/ryml/api/python/bm/bm_parse.py')
| -rw-r--r-- | thirdparty/ryml/api/python/bm/bm_parse.py | 237 |
1 files changed, 0 insertions, 237 deletions
diff --git a/thirdparty/ryml/api/python/bm/bm_parse.py b/thirdparty/ryml/api/python/bm/bm_parse.py deleted file mode 100644 index 294be679b..000000000 --- a/thirdparty/ryml/api/python/bm/bm_parse.py +++ /dev/null @@ -1,237 +0,0 @@ -import ryml -import ruamel.yaml -import yaml -import timeit -import time -import copy -import prettytable -import os.path -from collections import OrderedDict as odict - - -def _nodbg(*args, **kwargs): - pass - - -def _dbg(*args, **kwargs): - print(*args, **kwargs, file=sys.stderr, flush=True) - - -dbg = _dbg - - -class RunResults: - - __slots__ = ('name', 'time_ms', 'count', 'avg', 'MBps') - - def __init__(self, name, time_ms, count, num_bytes): - self.name = name - self.time_ms = time_ms - self.count = count - self.avg = time_ms / count - num_megabytes = count * num_bytes / 1.0e6 - num_seconds = time_ms / 1000.0 - self.MBps = num_megabytes / num_seconds - - def __str__(self): - fmt = "{}: count={} time={:.3f}ms avg={:.3f}ms MB/s={:.3f}" - fmt = fmt.format(self.name, self.count, self.time_ms, self.avg, self.MBps) - return fmt - - -class BmCase: - - def __init__(self, filename): - with open(filename, "r") as f: - src = f.read() - self.filename = filename - self.src_as_str = src - self.src_as_bytes = bytes(src, "utf8") - self.src_as_bytearray = bytearray(src, "utf8") - self.src_as_bytearray_orig = copy.copy(self.src_as_bytearray) - self.emittree = ryml.parse_in_arena(self.src_as_bytearray) - self.emitbuf = bytearray(4 * len(self.src_as_str)) # should be enough - - def run(self, bm_method_name, cls): - def run_bm(obj, subject): - obj.count = 0 - t = timeit.Timer(subject) - delta = time.time() - result = t.autorange() #lambda number, time_taken: time_taken > 1.0) - delta = 1000. * (time.time() - delta) - return delta, obj.count - obj = cls(self) - if not hasattr(obj, bm_method_name): - return None - name = bm_method_name + ":" + cls.__name__ - dbg(name, "...") - method = getattr(obj, bm_method_name) - reset_name = 'reset_' + bm_method_name - reset_fn = getattr(obj, reset_name, None) - def bm_fn(): - method(self) - obj.count += 1 - if reset_fn is not None: - reset_fn(self) - delta, count = run_bm(obj, bm_fn) - # correct the benchmark to account for the time spent - # resetting - if reset_fn is not None: - # find out how much it takes to reset the bytearray - if not hasattr(obj, 'bm_reset_done'): - def bm_reset(): - reset_fn(self) - obj.count += 1 - rdelta, rcount = run_bm(obj, bm_reset) - obj.bm_reset_time_per_iteration = rdelta / rcount - dbg(name, "reset_time_per_iteration={:.3f}us".format(obj.bm_reset_time_per_iteration * 1000.0)) - obj.bm_reset_done = True - reset_correction = count * obj.bm_reset_time_per_iteration - dbg(name, "delta={:.3f}ms".format(delta), "reset_correction={:.3f}ms({:.2f}%)".format(reset_correction, 100.0 * reset_correction / delta)) - delta -= reset_correction - ret = RunResults(name, delta, count, len(self.src_as_str)) - dbg(name, "ok:", ret) - return ret - - -def run(case, benchmarks, approaches): - for bm in benchmarks: - results = odict() - for cls in approaches: - r = case.run(bm, cls) - if r is None: - continue - results[r.name] = r - table = prettytable.PrettyTable() - name = os.path.basename(case.filename) - table.field_names = [name, "count", "time(ms)", "avg(ms)", "avg(MB/s)"] - table.align[name] = "l" - def i(v): return "{:5d}".format(v) - def f(v): return "{:8.3f}".format(v) - for v in results.values(): - table.add_row([v.name, i(v.count), f(v.time_ms), f(v.avg), f(v.MBps)]) - print(table) - - -class BmCaseRun: - def __init__(self, case): - self.reset_bytearray = False - - -class RymlParseInArena(BmCaseRun): - - def parse(self, case): - _ = ryml.parse_in_arena(case.src_as_bytearray) - - -class RymlParseInArenaReuse(BmCaseRun): - - def __init__(self, case): - self.tree = ryml.Tree() - - def parse(self, case): - ryml.parse_in_arena(case.src_as_bytearray, tree=self.tree) - - def reset_parse(self, case): - self.tree.clear() - self.tree.clear_arena() - - -class RymlParseInPlace(BmCaseRun): - - def parse(self, case): - _ = ryml.parse_in_place(case.src_as_bytearray) - - def reset_parse(self, case): - case.src_as_bytearray = copy.copy(case.src_as_bytearray_orig) - - -class RymlParseInPlaceReuse(BmCaseRun): - - def __init__(self, case): - self.tree = ryml.Tree() - - def parse(self, case): - ryml.parse_in_place(case.src_as_bytearray, tree=self.tree) - - def reset_parse(self, case): - self.tree.clear() - self.tree.clear_arena() - case.src_as_bytearray = copy.copy(case.src_as_bytearray_orig) - - -class RuamelYamlParse(BmCaseRun): - - def parse(self, case): - _ = ruamel.yaml.load(case.src_as_str, Loader=ruamel.yaml.Loader) - - -class PyYamlParse(BmCaseRun): - - def parse(self, case): - _ = yaml.safe_load(case.src_as_str) - - -class RymlEmitToNewBuffer(BmCaseRun): - - def emit_yaml(self, case): - _ = ryml.emit_yaml(case.emittree) - - def emit_json(self, case): - _ = ryml.emit_json(case.emittree) - - -class RymlEmitReuse(BmCaseRun): - - def emit_yaml(self, case): - _ = ryml.emit_yaml_in_place(case.emittree, case.emitbuf) - - def emit_json(self, case): - _ = ryml.emit_json_in_place(case.emittree, case.emitbuf) - - -class RuamelYamlEmit: - - def __init__(self, case): - case.ruamel_emittree = ruamel.yaml.load(case.src_as_str, Loader=ruamel.yaml.Loader) - - def emit_yaml(self, case): - # https://stackoverflow.com/a/47617341/5875572 - class MyToStr: - def __init__(self, *args, **kwargs): - self.s = b"" - def write(self, s): - self.s += s - dumper = MyToStr() - ruamel.yaml.YAML().dump(case.ruamel_emittree, MyToStr()) - - -class PyYamlEmit: - - def __init__(self, case): - case.pyyaml_emittree = yaml.load(case.src_as_str, Loader=yaml.Loader) - - def emit_yaml(self, case): - _ = yaml.dump(case.pyyaml_emittree) - - -if __name__ == "__main__": - import sys - if len(sys.argv) < 2: - raise Exception("") - filename = sys.argv[1] - if filename.endswith("outer1000_inner1000.yml"): # this one is too heavy for the Python libs - exit(0) - case = BmCase(filename) - run(case, benchmarks=('parse', ), - approaches=(RuamelYamlParse, - PyYamlParse, - RymlParseInArena, - RymlParseInArenaReuse, - RymlParseInPlace, - RymlParseInPlaceReuse)) - run(case, benchmarks=('emit_yaml', 'emit_json', ), - approaches=(RuamelYamlEmit, - PyYamlEmit, - RymlEmitToNewBuffer, - RymlEmitReuse)) |