aboutsummaryrefslogtreecommitdiff
path: root/scripts/test_scripts/hub/preserve_minio_state.py
blob: 365e4a542ffae3dc302c246674bb929c27a2b226 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python3
"""Preserve a freshly-seeded MinIO data directory.

Run this after seed_minio.py has finished. By default it MOVES --source (the
MinIO --data-dir used during seeding) to --dest (the preservation path) and
writes a README with provenance so future perf runs start from a known
baseline. Move avoids a ~0.5 TB copy on a full 1000-module seed; Stage B
wipes --minio-data-dir on its next invocation anyway.

Pass --copy to keep --source in place (slower; needs 2x disk).

Typical invocation:
  python preserve_minio_state.py

Defaults map to the paths recommended by PERF_SEED_README.md.
"""

from __future__ import annotations

import argparse
import datetime
import os
import shutil
import stat
import sys
from pathlib import Path


def _rmtree_robust(path) -> None:
    """shutil.rmtree with a Windows-friendly retry for read-only files."""
    def _onerror(func, p, exc_info):
        try:
            os.chmod(p, stat.S_IWRITE)
            func(p)
        except Exception:
            pass
    if sys.version_info >= (3, 12):
        shutil.rmtree(path, onexc=lambda func, p, exc: _onerror(func, p, (type(exc), exc, exc.__traceback__)))
    else:
        shutil.rmtree(path, onerror=_onerror)


def _size_of(path: Path) -> tuple[int, int]:
    files = 0
    total = 0
    for root, _dirs, names in os.walk(path):
        for n in names:
            p = Path(root) / n
            try:
                total += p.stat().st_size
            except OSError:
                pass
            files += 1
    return files, total


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("--source", default="E:/Dev/zen-perf-seed/minio-data",
                        help="Source MinIO data dir (default: E:/Dev/zen-perf-seed/minio-data)")
    parser.add_argument("--dest", default="E:/Dev/zen-perf-seed/minio-seeded-packed",
                        help="Preservation path (default: E:/Dev/zen-perf-seed/minio-seeded-packed). "
                             "Sibling to E:/Dev/zen-perf-seed/minio-seeded-baseline.")
    parser.add_argument("--s3-uri", default=os.environ.get("ZEN_PERF_S3_URI", ""),
                        help="Source S3 URI recorded in the README (defaults to $ZEN_PERF_S3_URI)")
    parser.add_argument("--bucket", default="zen-seed",
                        help="MinIO bucket name recorded in the README")
    parser.add_argument("--module-count", type=int, default=300,
                        help="Module count recorded in the README")
    parser.add_argument("--copy", action="store_true",
                        help="Copy --source to --dest instead of moving it. Default is move "
                             "(fast, in-place rename when on the same volume). Use --copy if you "
                             "want to keep --source intact for another preserve run.")
    args = parser.parse_args()

    source = Path(args.source).resolve()
    dest = Path(args.dest).resolve()

    if not source.is_dir():
        sys.exit(f"[preserve] source not found: {source}")

    # Dest is wiped and rewritten. Refuse any path that would clobber source.
    if dest == source or dest in source.parents or source in dest.parents:
        sys.exit(f"[preserve] source ({source}) and dest ({dest}) must be disjoint")

    files, total = _size_of(source)
    mode = "copy" if args.copy else "move"
    print(f"[preserve] source: {source} -> {files:,} files, {total/1024/1024:.1f} MB")
    print(f"[preserve] dest:   {dest}")
    print(f"[preserve] mode:   {mode}")

    if dest.exists():
        print(f"[preserve] removing existing dest {dest}")
        _rmtree_robust(dest)

    dest.parent.mkdir(parents=True, exist_ok=True)
    if args.copy:
        shutil.copytree(source, dest, symlinks=False)
    else:
        shutil.move(str(source), str(dest))

    readme = dest / "README.txt"
    readme.write_text(
        "\n".join([
            "zen-perf-seed preserved MinIO state",
            "",
            f"Created:       {datetime.datetime.now(datetime.timezone.utc).isoformat()}",
            f"Source s3 URI: {args.s3_uri}",
            f"Bucket:        {args.bucket}",
            f"Modules:       {args.module_count}",
            f"Files:         {files:,}",
            f"Bytes:         {total:,}",
            "",
            "To run a perf iteration: copy this directory onto a fresh MinIO data",
            "dir (see scripts/test_scripts/hub/run_minio_perf.py) and point a hub at it.",
            "",
        ]),
        encoding="ascii",
    )
    print(f"[preserve] wrote {readme}")
    return 0


if __name__ == "__main__":
    sys.exit(main())