aboutsummaryrefslogtreecommitdiff
path: root/scripts/test_scripts/hub/seed_minio.py
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2026-05-05 14:59:21 +0200
committerGitHub Enterprise <[email protected]>2026-05-05 14:59:21 +0200
commit46f456ffd4d0717a035253ff9076ca6ee664e536 (patch)
tree69d7a9a43b9874fd3990c43aa5ff4135c35d53d9 /scripts/test_scripts/hub/seed_minio.py
parentwatchdog ephemeral port exhaust (#1022) (diff)
downloadarchived-zen-46f456ffd4d0717a035253ff9076ca6ee664e536.tar.xz
archived-zen-46f456ffd4d0717a035253ff9076ca6ee664e536.zip
hub async s3 client (#1024)
- Feature: `AsyncHttpClient` adds cancellable request tokens, streaming GET to a file (`AsyncDownload`), zero-copy chunk-callback GET (`AsyncStream`), pull-mode body source for streaming `AsyncPut`, retry layer mirroring the synchronous client, and a submit-side in-flight cap (`HttpClientSettings::MaxConcurrentRequests`) so hub-scale fanout against a single host cannot stall queued handles into curl's connect-timeout window - Feature: Hub hydration can route S3 transfers through a non-blocking `AsyncHttpClient` (curl_multi + asio) backed by a single io thread; hydrate and dehydrate now pipeline requests instead of blocking worker threads - `--hub-hydration-async-enabled` (Lua: `hub.hydration.async.enabled`, default true) - `--hub-hydration-async-max-concurrent-requests` (Lua: `hub.hydration.async.maxconcurrentrequests`, default `clamp(cpu*4, 128, 512)`) - Feature: Hub provision/deprovision/obliterate now run as two phases on separate worker pools so per-module hydration cannot starve child-process spawn/despawn (and vice versa) - New `--hub-instance-spawn-threads` (Lua: `hub.instance.spawnthreads`, default `clamp(cpu/8, 4, 16)`) drives child-process spawn/despawn - `--hub-instance-provision-threads` (Lua: `hub.instance.provisionthreads`) now drives per-module hydrate/dehydrate scheduling only; default changed from `max(cpu/4, 2)` to `clamp(cpu/8, 4, 12)` - `--hub-hydration-threads` (Lua: `hub.hydration.threads`) now controls per-file workers inside a single hydrate/dehydrate; default changed from `max(cpu/4, 2)` to `clamp(cpu/8, 4, 12)` - Feature: `AsyncHttpClient` owns its `asio::io_context` and one io thread by default; the `(BaseUri, io_context&)` constructor is preserved for callers that want to share an externally-driven `io_context` across clients (caller MUST keep the loop running until the client destructs) - Feature: `Hub::Configuration` C++ struct fields renamed (`OptionalProvisionWorkerPool`/`OptionalHydrationWorkerPool` -> `OptionalProvisionPool`/`OptionalSpawnPool`/`OptionalHydrationPool`). Embedders constructing `Hub` directly must update field names; provision and spawn pools must both be set or both null (asserted at construction). - Bugfix: `S3Client` signing-key cache no longer returns stale signatures after IMDS-rotated credentials change `AccessKeyId`; cache is now keyed on `(DateStamp, AccessKeyId)`
Diffstat (limited to 'scripts/test_scripts/hub/seed_minio.py')
-rw-r--r--scripts/test_scripts/hub/seed_minio.py82
1 files changed, 47 insertions, 35 deletions
diff --git a/scripts/test_scripts/hub/seed_minio.py b/scripts/test_scripts/hub/seed_minio.py
index e0e45c4cb..f5928b995 100644
--- a/scripts/test_scripts/hub/seed_minio.py
+++ b/scripts/test_scripts/hub/seed_minio.py
@@ -1,13 +1,10 @@
#!/usr/bin/env python3
"""Stage B of the perf-seed workflow.
-Replays the snapshot produced by seed_s3_snapshot.py into a single MinIO
-bucket so a later perf run can exercise that bucket. Pack mode is hardcoded
-in this script (see --hub-hydration-enable-pack flag in _start_hub) and
-governs how the hub uploads into MinIO. To compare packed vs unpacked,
-invoke this script twice from two separate worktrees - one with the pack
-flag flipped to false - producing two preserved MinIO data dirs, then run
-run_minio_perf.py against each.
+Replays the snapshot produced by seed_s3_snapshot.py into a MinIO bucket so
+a later perf run can exercise it. Pack mode is fixed ON (the only mode the
+perf-seed pipeline caters to) - the hub is launched with
+--hub-hydration-enable-pack=true so dehydrate emits packed CAS.
Flow:
1. Start a local MinIO server against --minio-data-dir, create the bucket.
@@ -78,7 +75,7 @@ def _find_zenserver(override: Optional[str]) -> Path:
sys.exit(f"zenserver not found at {p}")
return p
script_dir = Path(__file__).resolve().parent
- repo_root = script_dir.parent.parent
+ repo_root = script_dir.parents[2]
for mode in ("release", "debug"):
for plat in (("windows", "x64"), ("linux", "x86_64"), ("macosx", "x86_64")):
p = repo_root / "build" / plat[0] / plat[1] / mode / f"zenserver{_EXE_SUFFIX}"
@@ -189,11 +186,10 @@ def _start_hub(
"--hub-provision-disk-limit-percent=99",
"--hub-provision-memory-limit-percent=80",
f"--hub-instance-limit={instance_limit}",
- # Seeding is not a perf-measurement path - we want it as fast as the
- # host can manage. Let the hub go wide on both provisioning and
- # hydration thread pools rather than matching prod limits.
- "--hub-instance-provision-threads=64",
- "--hub-hydration-threads=64",
+ # Provision / hydration / async-cap use server defaults; on a 128-core
+ # host these resolve to 16 / 16 / 512 which are sized for both the
+ # async hydrate (Stage A) and the sync dehydrate-PUT path that drives
+ # the upload here.
# Prevent the watchdog from auto-deprovisioning modules while we're
# still hydrating the tail / in the overlay phase. BOTH timers have to
# be extended - the provisioned one (default 600s) is what bites on
@@ -203,7 +199,7 @@ def _start_hub(
# Explicit - default is true, but make it obvious that Stage B needs
# it since the final deprovision drives the MinIO upload.
"--hub-enable-dehydration=true",
- # Pack worktree: turn pack on so dehydrate emits packed CAS.
+ # Pack ON (the only seeding mode) so dehydrate emits packed CAS.
"--hub-hydration-enable-pack=true",
] + extra_args
@@ -411,7 +407,16 @@ def _overlay_snapshot(snapshot_root: Path, hub_servers_root: Path, module_ids: l
"""Replace hub_servers_root/<mid>/* with snapshot_root/<mid>/*.
snapshot_root is treated as read-only; only hub_servers_root is written to.
+ Uses ReFS block-clone (`refs_clone.clone_tree`) when source and dest live on
+ a ReFS volume so the overlay is O(1) per file via copy-on-write metadata
+ rather than a full byte copy. Falls back to byte copy per-file when the
+ volume is not ReFS or a file is too small for FSCTL_DUPLICATE_EXTENTS_TO_FILE.
"""
+ from refs_clone import clone_tree, is_refs_volume
+
+ use_clone = is_refs_volume(snapshot_root) and is_refs_volume(hub_servers_root)
+ print(f"[overlay] {'ReFS block-clone path' if use_clone else 'byte-copy path (non-ReFS)'}")
+
files_copied = 0
bytes_copied = 0
modules_overlaid = 0
@@ -424,16 +429,21 @@ def _overlay_snapshot(snapshot_root: Path, hub_servers_root: Path, module_ids: l
continue
if dst.exists():
_rmtree_robust(dst)
- shutil.copytree(src, dst, symlinks=False, dirs_exist_ok=False)
+ if use_clone:
+ f, b = clone_tree(src, dst)
+ files_copied += f
+ bytes_copied += b
+ else:
+ shutil.copytree(src, dst, symlinks=False, dirs_exist_ok=False)
+ for root, _dirs, files in os.walk(dst):
+ for f in files:
+ p = Path(root) / f
+ try:
+ bytes_copied += p.stat().st_size
+ except OSError:
+ pass
+ files_copied += 1
modules_overlaid += 1
- for root, _dirs, files in os.walk(dst):
- for f in files:
- p = Path(root) / f
- try:
- bytes_copied += p.stat().st_size
- except OSError:
- pass
- files_copied += 1
if i % 25 == 0 or i == len(module_ids):
print(f"[overlay] {i}/{len(module_ids)} modules overlaid "
f"({files_copied:,} files, {bytes_copied/1024/1024:.1f} MB)")
@@ -601,21 +611,23 @@ def _seed_one_bucket(
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
- parser.add_argument("--snapshot-dir", default="E:/Dev/zen-perf-seed/s3-snapshot",
- help="Source of per-module server-state trees (READ-ONLY) (default: E:/Dev/zen-perf-seed/s3-snapshot)")
- parser.add_argument("--hub-data-root", default="E:/Dev/zen-perf-seed/hubs",
- help="Each bucket gets its own hub data dir under this root: <root>/hub-b-<bucket>/ "
- "(default: E:/Dev/zen-perf-seed/hubs)")
- parser.add_argument("--minio-data-dir", default="E:/Dev/zen-perf-seed/minio-data",
- help="MinIO data dir shared by every bucket (default: E:/Dev/zen-perf-seed/minio-data)")
+ parser.add_argument("--snapshot-dir", required=True,
+ help="Source of per-module server-state trees (READ-ONLY).")
+ parser.add_argument("--hub-data-root", required=True,
+ help="Each bucket gets its own hub data dir under this root: <root>/hub-b-<bucket>/")
+ parser.add_argument("--minio-data-dir", required=True,
+ help="MinIO data dir shared by every bucket.")
parser.add_argument("--minio-port", type=int, default=9000)
parser.add_argument("--minio-console-port", type=int, default=9001)
parser.add_argument("--hub-port", type=int, default=8558)
parser.add_argument("--bucket", default="zen-seed-packed",
- help="Bucket to seed (default: zen-seed-packed). Pack worktree - "
- "hub is launched with --hub-hydration-enable-pack=true.")
- parser.add_argument("--module-count", type=int, default=0,
- help="Cap on modules processed (0 = all modules in snapshot-dir)")
+ help="Bucket to seed (default: zen-seed-packed). Hub is launched with "
+ "--hub-hydration-enable-pack=true (the only seeding mode).")
+ parser.add_argument("--module-count", type=int, default=1000,
+ help="Cap on modules processed (default 1000, the hub instance shared-state "
+ "limit; raising this above ~1023 will hit 'all slots occupied' errors). "
+ "Pass 0 to process every module under --snapshot-dir but expect failures "
+ "if the snapshot has more than ~1023 entries.")
parser.add_argument("--workers", type=int, default=50)
parser.add_argument("--poll-timeout", type=float, default=1800.0,
help="Max seconds to wait for each state transition (default: 1800)")
@@ -706,7 +718,7 @@ def main() -> int:
for key, lbl in zip(phases, labels):
print(f" {lbl:<22s} {timings.get(key, 0.0):>8.1f}")
- print(f"[summary] next: preserve {minio_data_dir} to E:/Dev/zen-perf-seed/minio-seeded/")
+ print(f"[summary] next: preserve {minio_data_dir} via preserve_minio_state.py --source <this> --dest <baseline>")
finally:
if minio_proc is not None and minio_proc.poll() is None: