#!/usr/bin/env python3 """Preserve a freshly-seeded MinIO data directory. Run this after seed_minio.py has finished. By default it MOVES --source (the MinIO --data-dir used during seeding) to --dest (the preservation path) and writes a README with provenance so future perf runs start from a known baseline. Move avoids a ~0.5 TB copy on a full 1000-module seed; Stage B wipes --minio-data-dir on its next invocation anyway. Pass --copy to keep --source in place (slower; needs 2x disk). Typical invocation: python preserve_minio_state.py Defaults map to the paths recommended by PERF_SEED_README.md. """ from __future__ import annotations import argparse import datetime import os import shutil import stat import sys from pathlib import Path def _rmtree_robust(path) -> None: """shutil.rmtree with a Windows-friendly retry for read-only files.""" def _onerror(func, p, exc_info): try: os.chmod(p, stat.S_IWRITE) func(p) except Exception: pass if sys.version_info >= (3, 12): shutil.rmtree(path, onexc=lambda func, p, exc: _onerror(func, p, (type(exc), exc, exc.__traceback__))) else: shutil.rmtree(path, onerror=_onerror) def _size_of(path: Path) -> tuple[int, int]: files = 0 total = 0 for root, _dirs, names in os.walk(path): for n in names: p = Path(root) / n try: total += p.stat().st_size except OSError: pass files += 1 return files, total def main() -> int: parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("--source", default="E:/Dev/zen-perf-seed/minio-data", help="Source MinIO data dir (default: E:/Dev/zen-perf-seed/minio-data)") parser.add_argument("--dest", default="E:/Dev/zen-perf-seed/minio-seeded-packed", help="Preservation path (default: E:/Dev/zen-perf-seed/minio-seeded-packed). " "Sibling to E:/Dev/zen-perf-seed/minio-seeded-baseline.") parser.add_argument("--s3-uri", default=os.environ.get("ZEN_PERF_S3_URI", ""), help="Source S3 URI recorded in the README (defaults to $ZEN_PERF_S3_URI)") parser.add_argument("--bucket", default="zen-seed", help="MinIO bucket name recorded in the README") parser.add_argument("--module-count", type=int, default=300, help="Module count recorded in the README") parser.add_argument("--copy", action="store_true", help="Copy --source to --dest instead of moving it. Default is move " "(fast, in-place rename when on the same volume). Use --copy if you " "want to keep --source intact for another preserve run.") args = parser.parse_args() source = Path(args.source).resolve() dest = Path(args.dest).resolve() if not source.is_dir(): sys.exit(f"[preserve] source not found: {source}") # Dest is wiped and rewritten. Refuse any path that would clobber source. if dest == source or dest in source.parents or source in dest.parents: sys.exit(f"[preserve] source ({source}) and dest ({dest}) must be disjoint") files, total = _size_of(source) mode = "copy" if args.copy else "move" print(f"[preserve] source: {source} -> {files:,} files, {total/1024/1024:.1f} MB") print(f"[preserve] dest: {dest}") print(f"[preserve] mode: {mode}") if dest.exists(): print(f"[preserve] removing existing dest {dest}") _rmtree_robust(dest) dest.parent.mkdir(parents=True, exist_ok=True) if args.copy: shutil.copytree(source, dest, symlinks=False) else: shutil.move(str(source), str(dest)) readme = dest / "README.txt" readme.write_text( "\n".join([ "zen-perf-seed preserved MinIO state", "", f"Created: {datetime.datetime.now(datetime.timezone.utc).isoformat()}", f"Source s3 URI: {args.s3_uri}", f"Bucket: {args.bucket}", f"Modules: {args.module_count}", f"Files: {files:,}", f"Bytes: {total:,}", "", "To run a perf iteration: copy this directory onto a fresh MinIO data", "dir (see scripts/test_scripts/hub/run_minio_perf.py) and point a hub at it.", "", ]), encoding="ascii", ) print(f"[preserve] wrote {readme}") return 0 if __name__ == "__main__": sys.exit(main())