aboutsummaryrefslogtreecommitdiff
path: root/thirdparty/ryml/api/python/bm/bm_parse.py
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2025-11-07 14:49:13 +0100
committerGitHub Enterprise <[email protected]>2025-11-07 14:49:13 +0100
commit24e43a913f29ac3b314354e8ce5175f135bcc64f (patch)
treeca442937ceeb63461012b33a4576e9835099f106 /thirdparty/ryml/api/python/bm/bm_parse.py
parentget oplog attachments (#622) (diff)
downloadzen-24e43a913f29ac3b314354e8ce5175f135bcc64f.tar.xz
zen-24e43a913f29ac3b314354e8ce5175f135bcc64f.zip
switch to xmake for package management (#611)
This change removes our dependency on vcpkg for package management, in favour of bringing some code in-tree in the `thirdparty` folder as well as using the xmake build-in package management feature. For the latter, all the package definitions are maintained in the zen repo itself, in the `repo` folder. It should now also be easier to build the project as it will no longer depend on having the right version of vcpkg installed, which has been a common problem for new people coming in to the codebase. Now you should only need xmake to build. * Bumps xmake requirement on github runners to 2.9.9 to resolve an issue where xmake on Windows invokes cmake with `v144` toolchain which does not exist * BLAKE3 is now in-tree at `thirdparty/blake3` * cpr is now in-tree at `thirdparty/cpr` * cxxopts is now in-tree at `thirdparty/cxxopts` * fmt is now in-tree at `thirdparty/fmt` * robin-map is now in-tree at `thirdparty/robin-map` * ryml is now in-tree at `thirdparty/ryml` * sol2 is now in-tree at `thirdparty/sol2` * spdlog is now in-tree at `thirdparty/spdlog` * utfcpp is now in-tree at `thirdparty/utfcpp` * xmake package repo definitions is in `repo` * implemented support for sanitizers. ASAN is supported on windows, TSAN, UBSAN, MSAN etc are supported on Linux/MacOS though I have not yet tested it extensively on MacOS * the zencore encryption implementation also now supports using mbedTLS which is used on MacOS, though for now we still use openssl on Linux * crashpad * bumps libcurl to 8.11.0 (from 8.8.0) which should address a rare build upload bug
Diffstat (limited to 'thirdparty/ryml/api/python/bm/bm_parse.py')
-rw-r--r--thirdparty/ryml/api/python/bm/bm_parse.py237
1 files changed, 237 insertions, 0 deletions
diff --git a/thirdparty/ryml/api/python/bm/bm_parse.py b/thirdparty/ryml/api/python/bm/bm_parse.py
new file mode 100644
index 000000000..294be679b
--- /dev/null
+++ b/thirdparty/ryml/api/python/bm/bm_parse.py
@@ -0,0 +1,237 @@
+import ryml
+import ruamel.yaml
+import yaml
+import timeit
+import time
+import copy
+import prettytable
+import os.path
+from collections import OrderedDict as odict
+
+
+def _nodbg(*args, **kwargs):
+ pass
+
+
+def _dbg(*args, **kwargs):
+ print(*args, **kwargs, file=sys.stderr, flush=True)
+
+
+dbg = _dbg
+
+
+class RunResults:
+
+ __slots__ = ('name', 'time_ms', 'count', 'avg', 'MBps')
+
+ def __init__(self, name, time_ms, count, num_bytes):
+ self.name = name
+ self.time_ms = time_ms
+ self.count = count
+ self.avg = time_ms / count
+ num_megabytes = count * num_bytes / 1.0e6
+ num_seconds = time_ms / 1000.0
+ self.MBps = num_megabytes / num_seconds
+
+ def __str__(self):
+ fmt = "{}: count={} time={:.3f}ms avg={:.3f}ms MB/s={:.3f}"
+ fmt = fmt.format(self.name, self.count, self.time_ms, self.avg, self.MBps)
+ return fmt
+
+
+class BmCase:
+
+ def __init__(self, filename):
+ with open(filename, "r") as f:
+ src = f.read()
+ self.filename = filename
+ self.src_as_str = src
+ self.src_as_bytes = bytes(src, "utf8")
+ self.src_as_bytearray = bytearray(src, "utf8")
+ self.src_as_bytearray_orig = copy.copy(self.src_as_bytearray)
+ self.emittree = ryml.parse_in_arena(self.src_as_bytearray)
+ self.emitbuf = bytearray(4 * len(self.src_as_str)) # should be enough
+
+ def run(self, bm_method_name, cls):
+ def run_bm(obj, subject):
+ obj.count = 0
+ t = timeit.Timer(subject)
+ delta = time.time()
+ result = t.autorange() #lambda number, time_taken: time_taken > 1.0)
+ delta = 1000. * (time.time() - delta)
+ return delta, obj.count
+ obj = cls(self)
+ if not hasattr(obj, bm_method_name):
+ return None
+ name = bm_method_name + ":" + cls.__name__
+ dbg(name, "...")
+ method = getattr(obj, bm_method_name)
+ reset_name = 'reset_' + bm_method_name
+ reset_fn = getattr(obj, reset_name, None)
+ def bm_fn():
+ method(self)
+ obj.count += 1
+ if reset_fn is not None:
+ reset_fn(self)
+ delta, count = run_bm(obj, bm_fn)
+ # correct the benchmark to account for the time spent
+ # resetting
+ if reset_fn is not None:
+ # find out how much it takes to reset the bytearray
+ if not hasattr(obj, 'bm_reset_done'):
+ def bm_reset():
+ reset_fn(self)
+ obj.count += 1
+ rdelta, rcount = run_bm(obj, bm_reset)
+ obj.bm_reset_time_per_iteration = rdelta / rcount
+ dbg(name, "reset_time_per_iteration={:.3f}us".format(obj.bm_reset_time_per_iteration * 1000.0))
+ obj.bm_reset_done = True
+ reset_correction = count * obj.bm_reset_time_per_iteration
+ dbg(name, "delta={:.3f}ms".format(delta), "reset_correction={:.3f}ms({:.2f}%)".format(reset_correction, 100.0 * reset_correction / delta))
+ delta -= reset_correction
+ ret = RunResults(name, delta, count, len(self.src_as_str))
+ dbg(name, "ok:", ret)
+ return ret
+
+
+def run(case, benchmarks, approaches):
+ for bm in benchmarks:
+ results = odict()
+ for cls in approaches:
+ r = case.run(bm, cls)
+ if r is None:
+ continue
+ results[r.name] = r
+ table = prettytable.PrettyTable()
+ name = os.path.basename(case.filename)
+ table.field_names = [name, "count", "time(ms)", "avg(ms)", "avg(MB/s)"]
+ table.align[name] = "l"
+ def i(v): return "{:5d}".format(v)
+ def f(v): return "{:8.3f}".format(v)
+ for v in results.values():
+ table.add_row([v.name, i(v.count), f(v.time_ms), f(v.avg), f(v.MBps)])
+ print(table)
+
+
+class BmCaseRun:
+ def __init__(self, case):
+ self.reset_bytearray = False
+
+
+class RymlParseInArena(BmCaseRun):
+
+ def parse(self, case):
+ _ = ryml.parse_in_arena(case.src_as_bytearray)
+
+
+class RymlParseInArenaReuse(BmCaseRun):
+
+ def __init__(self, case):
+ self.tree = ryml.Tree()
+
+ def parse(self, case):
+ ryml.parse_in_arena(case.src_as_bytearray, tree=self.tree)
+
+ def reset_parse(self, case):
+ self.tree.clear()
+ self.tree.clear_arena()
+
+
+class RymlParseInPlace(BmCaseRun):
+
+ def parse(self, case):
+ _ = ryml.parse_in_place(case.src_as_bytearray)
+
+ def reset_parse(self, case):
+ case.src_as_bytearray = copy.copy(case.src_as_bytearray_orig)
+
+
+class RymlParseInPlaceReuse(BmCaseRun):
+
+ def __init__(self, case):
+ self.tree = ryml.Tree()
+
+ def parse(self, case):
+ ryml.parse_in_place(case.src_as_bytearray, tree=self.tree)
+
+ def reset_parse(self, case):
+ self.tree.clear()
+ self.tree.clear_arena()
+ case.src_as_bytearray = copy.copy(case.src_as_bytearray_orig)
+
+
+class RuamelYamlParse(BmCaseRun):
+
+ def parse(self, case):
+ _ = ruamel.yaml.load(case.src_as_str, Loader=ruamel.yaml.Loader)
+
+
+class PyYamlParse(BmCaseRun):
+
+ def parse(self, case):
+ _ = yaml.safe_load(case.src_as_str)
+
+
+class RymlEmitToNewBuffer(BmCaseRun):
+
+ def emit_yaml(self, case):
+ _ = ryml.emit_yaml(case.emittree)
+
+ def emit_json(self, case):
+ _ = ryml.emit_json(case.emittree)
+
+
+class RymlEmitReuse(BmCaseRun):
+
+ def emit_yaml(self, case):
+ _ = ryml.emit_yaml_in_place(case.emittree, case.emitbuf)
+
+ def emit_json(self, case):
+ _ = ryml.emit_json_in_place(case.emittree, case.emitbuf)
+
+
+class RuamelYamlEmit:
+
+ def __init__(self, case):
+ case.ruamel_emittree = ruamel.yaml.load(case.src_as_str, Loader=ruamel.yaml.Loader)
+
+ def emit_yaml(self, case):
+ # https://stackoverflow.com/a/47617341/5875572
+ class MyToStr:
+ def __init__(self, *args, **kwargs):
+ self.s = b""
+ def write(self, s):
+ self.s += s
+ dumper = MyToStr()
+ ruamel.yaml.YAML().dump(case.ruamel_emittree, MyToStr())
+
+
+class PyYamlEmit:
+
+ def __init__(self, case):
+ case.pyyaml_emittree = yaml.load(case.src_as_str, Loader=yaml.Loader)
+
+ def emit_yaml(self, case):
+ _ = yaml.dump(case.pyyaml_emittree)
+
+
+if __name__ == "__main__":
+ import sys
+ if len(sys.argv) < 2:
+ raise Exception("")
+ filename = sys.argv[1]
+ if filename.endswith("outer1000_inner1000.yml"): # this one is too heavy for the Python libs
+ exit(0)
+ case = BmCase(filename)
+ run(case, benchmarks=('parse', ),
+ approaches=(RuamelYamlParse,
+ PyYamlParse,
+ RymlParseInArena,
+ RymlParseInArenaReuse,
+ RymlParseInPlace,
+ RymlParseInPlaceReuse))
+ run(case, benchmarks=('emit_yaml', 'emit_json', ),
+ approaches=(RuamelYamlEmit,
+ PyYamlEmit,
+ RymlEmitToNewBuffer,
+ RymlEmitReuse))