1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
#!/usr/bin/env python3
"""Preserve a freshly-seeded MinIO data directory.
Run this after seed_minio.py has finished. By default it MOVES --source (the
MinIO --data-dir used during seeding) to --dest (the preservation path) and
writes a README with provenance so future perf runs start from a known
baseline. Move avoids a ~0.5 TB copy on a full 1000-module seed; Stage B
wipes --minio-data-dir on its next invocation anyway.
Pass --copy to keep --source in place (slower; needs 2x disk).
Typical invocation:
python preserve_minio_state.py
Defaults map to the paths recommended by PERF_SEED_README.md.
"""
from __future__ import annotations
import argparse
import datetime
import os
import shutil
import stat
import sys
from pathlib import Path
def _rmtree_robust(path) -> None:
"""shutil.rmtree with a Windows-friendly retry for read-only files."""
def _onerror(func, p, exc_info):
try:
os.chmod(p, stat.S_IWRITE)
func(p)
except Exception:
pass
if sys.version_info >= (3, 12):
shutil.rmtree(path, onexc=lambda func, p, exc: _onerror(func, p, (type(exc), exc, exc.__traceback__)))
else:
shutil.rmtree(path, onerror=_onerror)
def _size_of(path: Path) -> tuple[int, int]:
files = 0
total = 0
for root, _dirs, names in os.walk(path):
for n in names:
p = Path(root) / n
try:
total += p.stat().st_size
except OSError:
pass
files += 1
return files, total
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("--source", default="E:/Dev/zen-perf-seed/minio-data",
help="Source MinIO data dir (default: E:/Dev/zen-perf-seed/minio-data)")
parser.add_argument("--dest", default="E:/Dev/zen-perf-seed/minio-seeded-packed",
help="Preservation path (default: E:/Dev/zen-perf-seed/minio-seeded-packed). "
"Sibling to E:/Dev/zen-perf-seed/minio-seeded-baseline.")
parser.add_argument("--s3-uri", default=os.environ.get("ZEN_PERF_S3_URI", ""),
help="Source S3 URI recorded in the README (defaults to $ZEN_PERF_S3_URI)")
parser.add_argument("--bucket", default="zen-seed",
help="MinIO bucket name recorded in the README")
parser.add_argument("--module-count", type=int, default=300,
help="Module count recorded in the README")
parser.add_argument("--copy", action="store_true",
help="Copy --source to --dest instead of moving it. Default is move "
"(fast, in-place rename when on the same volume). Use --copy if you "
"want to keep --source intact for another preserve run.")
args = parser.parse_args()
source = Path(args.source).resolve()
dest = Path(args.dest).resolve()
if not source.is_dir():
sys.exit(f"[preserve] source not found: {source}")
# Dest is wiped and rewritten. Refuse any path that would clobber source.
if dest == source or dest in source.parents or source in dest.parents:
sys.exit(f"[preserve] source ({source}) and dest ({dest}) must be disjoint")
files, total = _size_of(source)
mode = "copy" if args.copy else "move"
print(f"[preserve] source: {source} -> {files:,} files, {total/1024/1024:.1f} MB")
print(f"[preserve] dest: {dest}")
print(f"[preserve] mode: {mode}")
if dest.exists():
print(f"[preserve] removing existing dest {dest}")
_rmtree_robust(dest)
dest.parent.mkdir(parents=True, exist_ok=True)
if args.copy:
shutil.copytree(source, dest, symlinks=False)
else:
shutil.move(str(source), str(dest))
readme = dest / "README.txt"
readme.write_text(
"\n".join([
"zen-perf-seed preserved MinIO state",
"",
f"Created: {datetime.datetime.now(datetime.timezone.utc).isoformat()}",
f"Source s3 URI: {args.s3_uri}",
f"Bucket: {args.bucket}",
f"Modules: {args.module_count}",
f"Files: {files:,}",
f"Bytes: {total:,}",
"",
"To run a perf iteration: copy this directory onto a fresh MinIO data",
"dir (see scripts/test_scripts/hub/run_minio_perf.py) and point a hub at it.",
"",
]),
encoding="ascii",
)
print(f"[preserve] wrote {readme}")
return 0
if __name__ == "__main__":
sys.exit(main())
|