diff options
| author | Dan Engelbrecht <[email protected]> | 2026-04-08 13:51:46 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-04-08 13:51:46 +0200 |
| commit | 289d66d7b54f0560253a2a4eb27bf697ad62fa83 (patch) | |
| tree | 59de8d951575c9ef6cbf597fce061ddc4ce71ef1 | |
| parent | sort items on dashboard (#924) (diff) | |
| download | zen-289d66d7b54f0560253a2a4eb27bf697ad62fa83.tar.xz zen-289d66d7b54f0560253a2a4eb27bf697ad62fa83.zip | |
hydration data obliteration (#923)
- Feature: Hub obliterate operation deletes all local and backend hydration data for a module
- Improvement: Hub dashboard adds obliterate button for individual, bulk, and by-name module deletion
| -rw-r--r-- | CHANGELOG.md | 2 | ||||
| -rw-r--r-- | VERSION.txt | 2 | ||||
| -rw-r--r-- | docs/hub.md | 23 | ||||
| -rw-r--r-- | src/zenserver-test/hub-tests.cpp | 5 | ||||
| -rw-r--r-- | src/zenserver/frontend/html/pages/hub.js | 123 | ||||
| -rw-r--r-- | src/zenserver/frontend/html/zen.css | 19 | ||||
| -rw-r--r-- | src/zenserver/hub/README.md | 17 | ||||
| -rw-r--r-- | src/zenserver/hub/httphubservice.cpp | 39 | ||||
| -rw-r--r-- | src/zenserver/hub/hub.cpp | 350 | ||||
| -rw-r--r-- | src/zenserver/hub/hub.h | 25 | ||||
| -rw-r--r-- | src/zenserver/hub/hubinstancestate.cpp | 2 | ||||
| -rw-r--r-- | src/zenserver/hub/hubinstancestate.h | 3 | ||||
| -rw-r--r-- | src/zenserver/hub/hydration.cpp | 191 | ||||
| -rw-r--r-- | src/zenserver/hub/hydration.h | 3 | ||||
| -rw-r--r-- | src/zenserver/hub/storageserverinstance.cpp | 71 | ||||
| -rw-r--r-- | src/zenserver/hub/storageserverinstance.h | 16 |
16 files changed, 738 insertions, 153 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 2209fe66a..bbc813365 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Feature: Configurable thread pools for hub instance provisioning and hydration `--hub-instance-provision-threads` defaults to `max(cpu_count / 4, 2)`. Set to 0 for synchronous operation. `--hub-hydration-threads` defaults to `max(cpu_count / 4, 2)`. Set to 0 for synchronous operation. +- Feature: Hub obliterate operation deletes all local and backend hydration data for a module - Improvement: Hub triggers GC on instance before deprovisioning to compact storage before dehydration - Improvement: GC status now reports pending triggers as running - Improvement: S3 client debug logging gated behind verbose mode to reduce log noise at default verbosity @@ -15,6 +16,7 @@ - Improvement: File modification tick on Linux/macOS now uses sub-second precision (100ns), matching Windows FILETIME granularity - Improvement: Dashboard front page now paginates Cooked Projects and Cache namespace lists - Improvement: Cache and Projects pages now paginate and sort lists consistently with the front page +- Improvement: Hub dashboard adds obliterate button for individual, bulk, and by-name module deletion - Bugfix: Fixed hub Consul health check registering with `/hub/health` endpoint which does not exist; now uses `/health` - Bugfix: Hub dashboard Resources tile was missing total disk space diff --git a/VERSION.txt b/VERSION.txt index 7bce94b08..afa2afe0b 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -5.8.3-pre0 +5.8.3-pre1 diff --git a/docs/hub.md b/docs/hub.md index 52a6fa3be..78d05f9b9 100644 --- a/docs/hub.md +++ b/docs/hub.md @@ -30,18 +30,23 @@ stateDiagram-v2 Provisioning --> Unprovisioned : failed Provisioned --> Hibernating : Hibernate Provisioned --> Deprovisioning : Deprovision / timeout + Provisioned --> Obliterating : Obliterate Provisioned --> Crashed : process exited Hibernating --> Hibernated : stopped Hibernating --> Provisioned : failed Hibernated --> Waking : Wake Hibernated --> Deprovisioning : Deprovision / timeout + Hibernated --> Obliterating : Obliterate Waking --> Provisioned : ready Waking --> Hibernated : failed Deprovisioning --> Unprovisioned : done Crashed --> Recovering : watchdog Crashed --> Deprovisioning : Deprovision + Crashed --> Obliterating : Obliterate Recovering --> Provisioned : success Recovering --> Unprovisioned : failed + Obliterating --> Unprovisioned : done + Obliterating --> Crashed : failed ``` **Stable states:** @@ -56,9 +61,9 @@ stateDiagram-v2 - **Crashed** - process exited unexpectedly. The watchdog attempts an in-place restart automatically. -Transitioning states (`Provisioning`, `Hibernating`, `Waking`, `Deprovisioning`, `Recovering`) -are transient and held exclusively by one operation at a time. If hibernation fails (the -process cannot be stopped cleanly), the instance remains Provisioned. +Transitioning states (`Provisioning`, `Hibernating`, `Waking`, `Deprovisioning`, `Recovering`, +`Obliterating`) are transient and held exclusively by one operation at a time. If hibernation +fails (the process cannot be stopped cleanly), the instance remains Provisioned. **Hibernation vs deprovision:** hibernating stops the process but keeps the data directory intact, allowing fast restart on the next Wake. Deprovisioning triggers a GC cycle, then @@ -66,9 +71,17 @@ dehydrates the instance's state back to the configured backend, then deletes all instance data. Explicit deprovision requests are always honoured; the watchdog timeout path always deprovisions rather than hibernates. +**Obliterate vs deprovision:** deprovisioning preserves data on the hydration backend so the +next provision of the same module starts warm. Obliterate permanently destroys both local +instance data and all backend hydration data for the module. This is irreversible. Obliterate +can be called on Provisioned, Hibernated, or Crashed instances. It also works on modules that +are not currently tracked by the hub (already deprovisioned) -- in that case it deletes only +the backend hydration data. Obliterating a module that was never provisioned is a no-op +success. + **Idempotent operations:** hibernating an already-hibernated instance, waking an -already-provisioned instance, or deprovisioning a non-existent module all return success -without side effects. +already-provisioned instance, deprovisioning a non-existent module, or obliterating a +never-provisioned module all return success without side effects. ## The Watchdog diff --git a/src/zenserver-test/hub-tests.cpp b/src/zenserver-test/hub-tests.cpp index 82dfd7e91..487e22b4b 100644 --- a/src/zenserver-test/hub-tests.cpp +++ b/src/zenserver-test/hub-tests.cpp @@ -762,9 +762,10 @@ TEST_CASE("hub.hibernate.errors") CHECK(!Result); CHECK_EQ(Result.StatusCode, HttpResponseCode::NotFound); + // Obliterate of an unknown module succeeds (cleans up backend data for dehydrated modules) Result = Client.Delete("modules/unknown"); - CHECK(!Result); - CHECK_EQ(Result.StatusCode, HttpResponseCode::NotFound); + CHECK(Result); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Accepted); // Double-provision: second call while first is in-flight returns 202 Accepted with the same port. Result = Client.Post("modules/errmod/provision"); diff --git a/src/zenserver/frontend/html/pages/hub.js b/src/zenserver/frontend/html/pages/hub.js index 1b84e46ec..7ae1deb5c 100644 --- a/src/zenserver/frontend/html/pages/hub.js +++ b/src/zenserver/frontend/html/pages/hub.js @@ -20,6 +20,7 @@ function _btn_enabled(state, action) if (action === "hibernate") { return state === "provisioned"; } if (action === "wake") { return state === "hibernated"; } if (action === "deprovision") { return _is_actionable(state); } + if (action === "obliterate") { return _is_actionable(state); } return false; } @@ -96,20 +97,24 @@ export class Page extends ZenPage this._bulk_label.className = "module-bulk-label"; this._btn_bulk_hibernate = _make_bulk_btn("\u23F8", "Hibernate", () => this._exec_action("hibernate", [...this._selected])); this._btn_bulk_wake = _make_bulk_btn("\u25B6", "Wake", () => this._exec_action("wake", [...this._selected])); - this._btn_bulk_deprov = _make_bulk_btn("\u2715", "Deprovision",() => this._confirm_deprovision([...this._selected])); + this._btn_bulk_deprov = _make_bulk_btn("\u23F9", "Deprovision",() => this._confirm_deprovision([...this._selected])); + this._btn_bulk_oblit = _make_bulk_btn("\uD83D\uDD25", "Obliterate", () => this._confirm_obliterate([...this._selected])); const bulk_sep = document.createElement("div"); bulk_sep.className = "module-bulk-sep"; this._btn_hibernate_all = _make_bulk_btn("\u23F8", "Hibernate All", () => this._confirm_all("hibernate", "Hibernate All")); this._btn_wake_all = _make_bulk_btn("\u25B6", "Wake All", () => this._confirm_all("wake", "Wake All")); - this._btn_deprov_all = _make_bulk_btn("\u2715", "Deprovision All",() => this._confirm_all("deprovision", "Deprovision All")); + this._btn_deprov_all = _make_bulk_btn("\u23F9", "Deprovision All",() => this._confirm_all("deprovision", "Deprovision All")); + this._btn_oblit_all = _make_bulk_btn("\uD83D\uDD25", "Obliterate All", () => this._confirm_obliterate(this._modules_data.map(m => m.moduleId))); this._bulk_bar.appendChild(this._bulk_label); this._bulk_bar.appendChild(this._btn_bulk_hibernate); this._bulk_bar.appendChild(this._btn_bulk_wake); this._bulk_bar.appendChild(this._btn_bulk_deprov); + this._bulk_bar.appendChild(this._btn_bulk_oblit); this._bulk_bar.appendChild(bulk_sep); this._bulk_bar.appendChild(this._btn_hibernate_all); this._bulk_bar.appendChild(this._btn_wake_all); this._bulk_bar.appendChild(this._btn_deprov_all); + this._bulk_bar.appendChild(this._btn_oblit_all); mod_host.appendChild(this._bulk_bar); // Module table @@ -153,7 +158,9 @@ export class Page extends ZenPage this._btn_next.textContent = "Next \u2192"; this._btn_next.addEventListener("click", () => this._go_page(this._page + 1)); this._btn_provision = _make_bulk_btn("+", "Provision", () => this._show_provision_modal()); + this._btn_obliterate = _make_bulk_btn("\uD83D\uDD25", "Obliterate", () => this._show_obliterate_modal()); pager.appendChild(this._btn_provision); + pager.appendChild(this._btn_obliterate); pager.appendChild(this._btn_prev); pager.appendChild(this._pager_label); pager.appendChild(this._btn_next); @@ -297,7 +304,7 @@ export class Page extends ZenPage row.idx.textContent = i + 1; row.cb.checked = this._selected.has(id); row.dot.setAttribute("data-state", state); - if (state === "deprovisioning") + if (state === "deprovisioning" || state === "obliterating") { row.dot.setAttribute("data-prev-state", prev); } @@ -319,6 +326,7 @@ export class Page extends ZenPage row.btn_hibernate.disabled = !_btn_enabled(state, "hibernate"); row.btn_wake.disabled = !_btn_enabled(state, "wake"); row.btn_deprov.disabled = !_btn_enabled(state, "deprovision"); + row.btn_oblit.disabled = !_btn_enabled(state, "obliterate"); if (m.process_metrics) { @@ -385,7 +393,7 @@ export class Page extends ZenPage const dot = document.createElement("span"); dot.className = "module-state-dot"; dot.setAttribute("data-state", state); - if (state === "deprovisioning") + if (state === "deprovisioning" || state === "obliterating") { dot.setAttribute("data-prev-state", prev); } @@ -409,13 +417,16 @@ export class Page extends ZenPage btn_o.disabled = state !== "provisioned"; const [wrap_h, btn_h] = _make_action_btn("\u23F8", "Hibernate", () => this._post_module_action(id, "hibernate").then(() => this._update())); const [wrap_w, btn_w] = _make_action_btn("\u25B6", "Wake", () => this._post_module_action(id, "wake").then(() => this._update())); - const [wrap_d, btn_d] = _make_action_btn("\u2715", "Deprovision", () => this._confirm_deprovision([id])); + const [wrap_d, btn_d] = _make_action_btn("\u23F9", "Deprovision", () => this._confirm_deprovision([id])); + const [wrap_x, btn_x] = _make_action_btn("\uD83D\uDD25", "Obliterate", () => this._confirm_obliterate([id])); btn_h.disabled = !_btn_enabled(state, "hibernate"); btn_w.disabled = !_btn_enabled(state, "wake"); btn_d.disabled = !_btn_enabled(state, "deprovision"); + btn_x.disabled = !_btn_enabled(state, "obliterate"); td_action.appendChild(wrap_h); td_action.appendChild(wrap_w); td_action.appendChild(wrap_d); + td_action.appendChild(wrap_x); td_action.appendChild(wrap_o); tr.appendChild(td_action); @@ -476,7 +487,7 @@ export class Page extends ZenPage metrics_td.appendChild(metrics_grid); metrics_tr.appendChild(metrics_td); - row = { tr, metrics_tr, idx: td_idx, cb, dot, state_text: state_node, port_text: port_node, btn_expand, btn_open: btn_o, btn_hibernate: btn_h, btn_wake: btn_w, btn_deprov: btn_d, metric_nodes, state_since_node, state_age_node, state_since_label, state_age_label }; + row = { tr, metrics_tr, idx: td_idx, cb, dot, state_text: state_node, port_text: port_node, btn_expand, btn_open: btn_o, btn_hibernate: btn_h, btn_wake: btn_w, btn_deprov: btn_d, btn_oblit: btn_x, metric_nodes, state_since_node, state_age_node, state_since_label, state_age_label }; this._row_cache.set(id, row); } @@ -586,6 +597,7 @@ export class Page extends ZenPage this._btn_bulk_hibernate.disabled = !this._all_selected_in_state("provisioned"); this._btn_bulk_wake.disabled = !this._all_selected_in_state("hibernated"); this._btn_bulk_deprov.disabled = selected === 0; + this._btn_bulk_oblit.disabled = selected === 0; this._select_all_cb.disabled = total === 0; this._select_all_cb.checked = selected === total && total > 0; @@ -598,6 +610,7 @@ export class Page extends ZenPage this._btn_hibernate_all.disabled = empty; this._btn_wake_all.disabled = empty; this._btn_deprov_all.disabled = empty; + this._btn_oblit_all.disabled = empty; } _on_select_all() @@ -643,6 +656,35 @@ export class Page extends ZenPage .option("Deprovision", () => this._exec_action("deprovision", ids)); } + _confirm_obliterate(ids) + { + const warn = "\uD83D\uDD25 WARNING: This action is irreversible! \uD83D\uDD25"; + const detail = "All local and backend data will be permanently destroyed.\nThis cannot be undone."; + let message; + if (ids.length === 1) + { + const id = ids[0]; + const state = this._module_state(id) || "unknown"; + message = `${warn}\n\n${detail}\n\nModule ID: ${id}\nCurrent state: ${state}`; + } + else + { + message = `${warn}\n\nObliterate ${ids.length} modules.\n\n${detail}`; + } + + new Modal() + .title("\uD83D\uDD25 Obliterate") + .message(message) + .option("Cancel", null) + .option("\uD83D\uDD25 Obliterate", () => this._exec_obliterate(ids)); + } + + async _exec_obliterate(ids) + { + await Promise.allSettled(ids.map(id => fetch(`/hub/modules/${encodeURIComponent(id)}`, { method: "DELETE" }))); + await this._update(); + } + _confirm_all(action, label) { // Capture IDs at modal-open time so action targets the displayed list @@ -667,7 +709,7 @@ export class Page extends ZenPage await fetch(`/hub/modules/${moduleId}/${action}`, { method: "POST" }); } - _show_provision_modal() + _show_module_input_modal({ title, submit_label, warning, on_submit }) { const MODULE_ID_RE = /^[A-Za-z0-9][A-Za-z0-9-]*$/; @@ -682,15 +724,23 @@ export class Page extends ZenPage const dialog = document.createElement("div"); overlay.appendChild(dialog); - const title = document.createElement("div"); - title.className = "zen_modal_title"; - title.textContent = "Provision Module"; - dialog.appendChild(title); + const title_el = document.createElement("div"); + title_el.className = "zen_modal_title"; + title_el.textContent = title; + dialog.appendChild(title_el); const content = document.createElement("div"); content.className = "zen_modal_message"; content.style.textAlign = "center"; + if (warning) + { + const warn = document.createElement("div"); + warn.style.cssText = "color:var(--theme_fail);font-weight:bold;margin-bottom:12px;"; + warn.textContent = warning; + content.appendChild(warn); + } + const input = document.createElement("input"); input.type = "text"; input.placeholder = "module-name"; @@ -711,7 +761,7 @@ export class Page extends ZenPage btn_cancel.addEventListener("click", () => overlay.remove()); const btn_submit = document.createElement("div"); - btn_submit.textContent = "Provision"; + btn_submit.textContent = submit_label; buttons.appendChild(btn_cancel); buttons.appendChild(btn_submit); @@ -720,7 +770,7 @@ export class Page extends ZenPage let submitting = false; const set_submit_enabled = (enabled) => { - btn_submit.style.opacity = enabled ? "" : "0.4"; + btn_submit.style.opacity = enabled ? "" : "0.4"; btn_submit.style.pointerEvents = enabled ? "" : "none"; }; @@ -759,16 +809,13 @@ export class Page extends ZenPage try { - const resp = await fetch(`/hub/modules/${encodeURIComponent(moduleId)}/provision`, { method: "POST" }); - if (resp.ok) + const ok = await on_submit(moduleId); + if (ok) { overlay.remove(); await this._update(); - this._navigate_to_module(moduleId); return; } - const msg = await resp.text(); - error_div.textContent = msg || ("HTTP " + resp.status); } catch (e) { @@ -786,6 +833,46 @@ export class Page extends ZenPage document.body.appendChild(overlay); input.focus(); + + return { error_div }; + } + + _show_provision_modal() + { + const { error_div } = this._show_module_input_modal({ + title: "Provision Module", + submit_label: "Provision", + on_submit: async (moduleId) => { + const resp = await fetch(`/hub/modules/${encodeURIComponent(moduleId)}/provision`, { method: "POST" }); + if (resp.ok) + { + this._navigate_to_module(moduleId); + return true; + } + const msg = await resp.text(); + error_div.textContent = msg || ("HTTP " + resp.status); + return false; + } + }); + } + + _show_obliterate_modal() + { + const { error_div } = this._show_module_input_modal({ + title: "\uD83D\uDD25 Obliterate Module", + submit_label: "\uD83D\uDD25 Obliterate", + warning: "\uD83D\uDD25 WARNING: This action is irreversible! \uD83D\uDD25\nAll local and backend data will be permanently destroyed.", + on_submit: async (moduleId) => { + const resp = await fetch(`/hub/modules/${encodeURIComponent(moduleId)}`, { method: "DELETE" }); + if (resp.ok) + { + return true; + } + const msg = await resp.text(); + error_div.textContent = msg || ("HTTP " + resp.status); + return false; + } + }); } _navigate_to_module(moduleId) diff --git a/src/zenserver/frontend/html/zen.css b/src/zenserver/frontend/html/zen.css index cb3d78cf2..ca577675b 100644 --- a/src/zenserver/frontend/html/zen.css +++ b/src/zenserver/frontend/html/zen.css @@ -1611,6 +1611,25 @@ tr:last-child td { animation: module-dot-deprovisioning-from-provisioned 1s steps(1, end) infinite; } +@keyframes module-dot-obliterating-from-provisioned { + 0%, 59.9% { background: var(--theme_fail); } + 60%, 100% { background: var(--theme_ok); } +} +@keyframes module-dot-obliterating-from-hibernated { + 0%, 59.9% { background: var(--theme_fail); } + 60%, 100% { background: var(--theme_warn); } +} + +.module-state-dot[data-state="obliterating"][data-prev-state="provisioned"] { + animation: module-dot-obliterating-from-provisioned 0.5s steps(1, end) infinite; +} +.module-state-dot[data-state="obliterating"][data-prev-state="hibernated"] { + animation: module-dot-obliterating-from-hibernated 0.5s steps(1, end) infinite; +} +.module-state-dot[data-state="obliterating"] { + animation: module-dot-obliterating-from-provisioned 0.5s steps(1, end) infinite; +} + .module-action-cell { white-space: nowrap; display: flex; diff --git a/src/zenserver/hub/README.md b/src/zenserver/hub/README.md index 322be3649..c75349fa5 100644 --- a/src/zenserver/hub/README.md +++ b/src/zenserver/hub/README.md @@ -3,23 +3,32 @@ The Zen Server can act in a "hub" mode. In this mode, the only services offered are the basic health and diagnostic services alongside an API to provision and deprovision Storage server instances. +A module ID is an alphanumeric identifier (hyphens allowed) that identifies a dataset, typically +associated with a content plug-in module. + ## Generic Server API GET `/health` - returns an `OK!` payload when all enabled services are up and responding ## Hub API -GET `{moduleid}` - alphanumeric identifier to identify a dataset (typically associated with a content plug-in module) - -GET `/hub/status` - obtain a summary of the currently live instances +GET `/hub/status` - obtain a summary of all currently live instances GET `/hub/modules/{moduleid}` - retrieve information about a module +DELETE `/hub/modules/{moduleid}` - obliterate a module (permanently destroys all data) + POST `/hub/modules/{moduleid}/provision` - provision service for module POST `/hub/modules/{moduleid}/deprovision` - deprovision service for module -GET `/hub/stats` - retrieve stats for service +POST `/hub/modules/{moduleid}/hibernate` - hibernate a provisioned module + +POST `/hub/modules/{moduleid}/wake` - wake a hibernated module + +GET `/stats/hub` - retrieve stats for the hub service + +`/hub/proxy/{port}/{path}` - reverse proxy to a child instance dashboard (all HTTP verbs) ## Hub Configuration diff --git a/src/zenserver/hub/httphubservice.cpp b/src/zenserver/hub/httphubservice.cpp index eba816793..e6a900066 100644 --- a/src/zenserver/hub/httphubservice.cpp +++ b/src/zenserver/hub/httphubservice.cpp @@ -389,45 +389,18 @@ HttpHubService::HandleModuleGet(HttpServerRequest& Request, std::string_view Mod void HttpHubService::HandleModuleDelete(HttpServerRequest& Request, std::string_view ModuleId) { - Hub::InstanceInfo InstanceInfo; - if (!m_Hub.Find(ModuleId, &InstanceInfo)) - { - Request.WriteResponse(HttpResponseCode::NotFound); - return; - } + Hub::Response Resp = m_Hub.Obliterate(std::string(ModuleId)); - if (InstanceInfo.State == HubInstanceState::Provisioned || InstanceInfo.State == HubInstanceState::Hibernated || - InstanceInfo.State == HubInstanceState::Crashed) + if (HandleFailureResults(Request, Resp)) { - try - { - Hub::Response Resp = m_Hub.Deprovision(std::string(ModuleId)); - - if (HandleFailureResults(Request, Resp)) - { - return; - } - - // TODO: nuke all related storage - - const HttpResponseCode HttpCode = - (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK; - CbObjectWriter Obj; - Obj << "moduleId" << ModuleId; - return Request.WriteResponse(HttpCode, Obj.Save()); - } - catch (const std::exception& Ex) - { - ZEN_ERROR("Exception while deprovisioning module '{}': {}", ModuleId, Ex.what()); - throw; - } + return; } - // TODO: nuke all related storage - + const HttpResponseCode HttpCode = + (Resp.ResponseCode == Hub::EResponseCode::Accepted) ? HttpResponseCode::Accepted : HttpResponseCode::OK; CbObjectWriter Obj; Obj << "moduleId" << ModuleId; - Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); + Request.WriteResponse(HttpCode, Obj.Save()); } void diff --git a/src/zenserver/hub/hub.cpp b/src/zenserver/hub/hub.cpp index b2ebcd16f..ac6da9141 100644 --- a/src/zenserver/hub/hub.cpp +++ b/src/zenserver/hub/hub.cpp @@ -325,7 +325,8 @@ Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo) auto NewInstance = std::make_unique<StorageServerInstance>( m_RunEnvironment, StorageServerInstance::Configuration{.BasePort = GetInstanceIndexAssignedPort(ActiveInstanceIndex), - .HydrationTempPath = m_HydrationTempPath, + .StateDir = m_RunEnvironment.CreateChildDir(ModuleId), + .TempDir = m_HydrationTempPath / ModuleId, .HydrationTargetSpecification = m_HydrationTargetSpecification, .HydrationOptions = m_HydrationOptions, .HttpThreadCount = m_Config.InstanceHttpThreadCount, @@ -596,6 +597,7 @@ Hub::InternalDeprovision(const std::string& ModuleId, std::function<bool(ActiveI switch (CurrentState) { case HubInstanceState::Deprovisioning: + case HubInstanceState::Obliterating: return Response{EResponseCode::Accepted}; case HubInstanceState::Crashed: case HubInstanceState::Hibernated: @@ -679,6 +681,183 @@ Hub::InternalDeprovision(const std::string& ModuleId, std::function<bool(ActiveI return Response{m_WorkerPool ? EResponseCode::Accepted : EResponseCode::Completed}; } +Hub::Response +Hub::Obliterate(const std::string& ModuleId) +{ + ZEN_ASSERT(!m_ShutdownFlag.load()); + + StorageServerInstance::ExclusiveLockedPtr Instance; + size_t ActiveInstanceIndex = (size_t)-1; + { + RwLock::ExclusiveLockScope Lock(m_Lock); + + if (auto It = m_InstanceLookup.find(ModuleId); It != m_InstanceLookup.end()) + { + ActiveInstanceIndex = It->second; + ZEN_ASSERT(ActiveInstanceIndex < m_ActiveInstances.size()); + + HubInstanceState CurrentState = m_ActiveInstances[ActiveInstanceIndex].State.load(); + + switch (CurrentState) + { + case HubInstanceState::Obliterating: + return Response{EResponseCode::Accepted}; + case HubInstanceState::Provisioned: + case HubInstanceState::Hibernated: + case HubInstanceState::Crashed: + break; + case HubInstanceState::Deprovisioning: + return Response{EResponseCode::Rejected, + fmt::format("Module '{}' is being deprovisioned, retry after completion", ModuleId)}; + case HubInstanceState::Recovering: + return Response{EResponseCode::Rejected, fmt::format("Module '{}' is currently recovering from a crash", ModuleId)}; + case HubInstanceState::Unprovisioned: + return Response{EResponseCode::Completed}; + default: + return Response{EResponseCode::Rejected, + fmt::format("Module '{}' is currently in state '{}'", ModuleId, ToString(CurrentState))}; + } + + std::unique_ptr<StorageServerInstance>& RawInstance = m_ActiveInstances[ActiveInstanceIndex].Instance; + ZEN_ASSERT(RawInstance != nullptr); + + Instance = RawInstance->LockExclusive(/*Wait*/ true); + } + else + { + // Module not tracked by hub - obliterate backend data directly. + // Covers the deprovisioned case where data was preserved via dehydration. + if (m_ObliteratingInstances.contains(ModuleId)) + { + return Response{EResponseCode::Accepted}; + } + + m_ObliteratingInstances.insert(ModuleId); + Lock.ReleaseNow(); + + if (m_WorkerPool) + { + m_BackgroundWorkLatch.AddCount(1); + try + { + m_WorkerPool->ScheduleWork( + [this, ModuleId = std::string(ModuleId)]() { + auto Guard = MakeGuard([this, ModuleId]() { + m_Lock.WithExclusiveLock([this, ModuleId]() { m_ObliteratingInstances.erase(ModuleId); }); + m_BackgroundWorkLatch.CountDown(); + }); + try + { + ObliterateBackendData(ModuleId); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed async obliterate of untracked module '{}': {}", ModuleId, Ex.what()); + } + }, + WorkerThreadPool::EMode::EnableBacklog); + } + catch (const std::exception& DispatchEx) + { + ZEN_ERROR("Failed to dispatch async obliterate of untracked module '{}': {}", ModuleId, DispatchEx.what()); + m_BackgroundWorkLatch.CountDown(); + { + RwLock::ExclusiveLockScope _(m_Lock); + m_ObliteratingInstances.erase(ModuleId); + } + throw; + } + + return Response{EResponseCode::Accepted}; + } + + auto _ = MakeGuard([this, &ModuleId]() { + RwLock::ExclusiveLockScope _(m_Lock); + m_ObliteratingInstances.erase(ModuleId); + }); + + ObliterateBackendData(ModuleId); + + return Response{EResponseCode::Completed}; + } + } + + HubInstanceState OldState = UpdateInstanceState(Instance, ActiveInstanceIndex, HubInstanceState::Obliterating); + const uint16_t Port = Instance.GetBasePort(); + NotifyStateUpdate(ModuleId, OldState, HubInstanceState::Obliterating, Port, {}); + + if (m_WorkerPool) + { + std::shared_ptr<StorageServerInstance::ExclusiveLockedPtr> SharedInstancePtr = + std::make_shared<StorageServerInstance::ExclusiveLockedPtr>(std::move(Instance)); + + m_BackgroundWorkLatch.AddCount(1); + try + { + m_WorkerPool->ScheduleWork( + [this, ModuleId = std::string(ModuleId), ActiveInstanceIndex, Instance = std::move(SharedInstancePtr)]() mutable { + auto _ = MakeGuard([this]() { m_BackgroundWorkLatch.CountDown(); }); + try + { + CompleteObliterate(*Instance, ActiveInstanceIndex); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed async obliterate of module '{}': {}", ModuleId, Ex.what()); + } + }, + WorkerThreadPool::EMode::EnableBacklog); + } + catch (const std::exception& DispatchEx) + { + ZEN_ERROR("Failed async dispatch obliterate of module '{}': {}", ModuleId, DispatchEx.what()); + m_BackgroundWorkLatch.CountDown(); + + NotifyStateUpdate(ModuleId, HubInstanceState::Obliterating, OldState, Port, {}); + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId)) != m_InstanceLookup.end()); + ZEN_ASSERT_SLOW(m_InstanceLookup.find(std::string(ModuleId))->second == ActiveInstanceIndex); + UpdateInstanceState(HubLock, ActiveInstanceIndex, OldState); + } + + throw; + } + } + else + { + CompleteObliterate(Instance, ActiveInstanceIndex); + } + + return Response{m_WorkerPool ? EResponseCode::Accepted : EResponseCode::Completed}; +} + +void +Hub::CompleteObliterate(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex) +{ + const std::string ModuleId(Instance.GetModuleId()); + const uint16_t Port = Instance.GetBasePort(); + + try + { + Instance.Obliterate(); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed to obliterate storage server instance for module '{}': {}", ModuleId, Ex.what()); + Instance = {}; + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Crashed); + } + NotifyStateUpdate(ModuleId, HubInstanceState::Obliterating, HubInstanceState::Crashed, Port, {}); + throw; + } + + NotifyStateUpdate(ModuleId, HubInstanceState::Obliterating, HubInstanceState::Unprovisioned, Port, {}); + RemoveInstance(Instance, ActiveInstanceIndex, ModuleId); +} + void Hub::CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState) { @@ -687,30 +866,28 @@ Hub::CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, si try { + if (OldState == HubInstanceState::Provisioned) { - if (OldState == HubInstanceState::Provisioned) - { - ZEN_INFO("Triggering GC for module {}", ModuleId); + ZEN_INFO("Triggering GC for module {}", ModuleId); - HttpClient GcClient(fmt::format("http://localhost:{}", Port)); + HttpClient GcClient(fmt::format("http://localhost:{}", Port)); - HttpClient::KeyValueMap Params; - Params.Entries.insert({"smallobjects", "true"}); - Params.Entries.insert({"skipcid", "false"}); - HttpClient::Response Response = GcClient.Post("/admin/gc", HttpClient::Accept(HttpContentType::kCbObject), Params); - Stopwatch Timer; - while (Response && Timer.GetElapsedTimeMs() < 5000) + HttpClient::KeyValueMap Params; + Params.Entries.insert({"smallobjects", "true"}); + Params.Entries.insert({"skipcid", "false"}); + HttpClient::Response Response = GcClient.Post("/admin/gc", HttpClient::Accept(HttpContentType::kCbObject), Params); + Stopwatch Timer; + while (Response && Timer.GetElapsedTimeMs() < 5000) + { + Response = GcClient.Get("/admin/gc", HttpClient::Accept(HttpContentType::kCbObject)); + if (Response) { - Response = GcClient.Get("/admin/gc", HttpClient::Accept(HttpContentType::kCbObject)); - if (Response) + bool Complete = Response.AsObject()["Status"].AsString() != "Running"; + if (Complete) { - bool Complete = Response.AsObject()["Status"].AsString() != "Running"; - if (Complete) - { - break; - } - Sleep(50); + break; } + Sleep(50); } } } @@ -732,20 +909,7 @@ Hub::CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, si } NotifyStateUpdate(ModuleId, HubInstanceState::Deprovisioning, HubInstanceState::Unprovisioned, Port, {}); - Instance = {}; - - std::unique_ptr<StorageServerInstance> DeleteInstance; - { - RwLock::ExclusiveLockScope HubLock(m_Lock); - auto It = m_InstanceLookup.find(std::string(ModuleId)); - ZEN_ASSERT_SLOW(It != m_InstanceLookup.end()); - ZEN_ASSERT_SLOW(It->second == ActiveInstanceIndex); - DeleteInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance); - m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex); - m_InstanceLookup.erase(It); - UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Unprovisioned); - } - DeleteInstance.reset(); + RemoveInstance(Instance, ActiveInstanceIndex, ModuleId); } Hub::Response @@ -1018,6 +1182,50 @@ Hub::CompleteWake(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t Ac } } +void +Hub::RemoveInstance(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, std::string_view ModuleId) +{ + Instance = {}; + + std::unique_ptr<StorageServerInstance> DeleteInstance; + { + RwLock::ExclusiveLockScope HubLock(m_Lock); + auto It = m_InstanceLookup.find(std::string(ModuleId)); + ZEN_ASSERT_SLOW(It != m_InstanceLookup.end()); + ZEN_ASSERT_SLOW(It->second == ActiveInstanceIndex); + DeleteInstance = std::move(m_ActiveInstances[ActiveInstanceIndex].Instance); + m_FreeActiveInstanceIndexes.push_back(ActiveInstanceIndex); + m_InstanceLookup.erase(It); + UpdateInstanceState(HubLock, ActiveInstanceIndex, HubInstanceState::Unprovisioned); + } + DeleteInstance.reset(); +} + +void +Hub::ObliterateBackendData(std::string_view ModuleId) +{ + std::filesystem::path ServerStateDir = m_RunEnvironment.GetChildBaseDir() / ModuleId; + std::filesystem::path TempDir = m_HydrationTempPath / ModuleId; + + std::atomic<bool> AbortFlag{false}; + std::atomic<bool> PauseFlag{false}; + + HydrationConfig Config{.ServerStateDir = ServerStateDir, + .TempDir = TempDir, + .ModuleId = std::string(ModuleId), + .TargetSpecification = m_HydrationTargetSpecification, + .Options = m_HydrationOptions}; + if (m_Config.OptionalHydrationWorkerPool) + { + Config.Threading.emplace(HydrationConfig::ThreadingOptions{.WorkerPool = m_Config.OptionalHydrationWorkerPool, + .AbortFlag = &AbortFlag, + .PauseFlag = &PauseFlag}); + } + + std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); + Hydrator->Obliterate(); +} + bool Hub::Find(std::string_view ModuleId, InstanceInfo* OutInstanceInfo) { @@ -1076,7 +1284,12 @@ Hub::GetInstanceCount() bool Hub::CanProvisionInstanceLocked(std::string_view ModuleId, std::string& OutReason) { - ZEN_UNUSED(ModuleId); + if (m_ObliteratingInstances.contains(std::string(ModuleId))) + { + OutReason = fmt::format("module '{}' is being obliterated", ModuleId); + return false; + } + if (m_FreeActiveInstanceIndexes.empty()) { OutReason = fmt::format("instance limit ({}) exceeded", m_Config.InstanceLimit); @@ -1137,11 +1350,13 @@ Hub::UpdateInstanceStateLocked(size_t ActiveInstanceIndex, HubInstanceState NewS case HubInstanceState::Unprovisioned: return To == HubInstanceState::Provisioning; case HubInstanceState::Provisioned: - return To == HubInstanceState::Hibernating || To == HubInstanceState::Deprovisioning || To == HubInstanceState::Crashed; + return To == HubInstanceState::Hibernating || To == HubInstanceState::Deprovisioning || To == HubInstanceState::Crashed || + To == HubInstanceState::Obliterating; case HubInstanceState::Hibernated: - return To == HubInstanceState::Waking || To == HubInstanceState::Deprovisioning; + return To == HubInstanceState::Waking || To == HubInstanceState::Deprovisioning || To == HubInstanceState::Obliterating; case HubInstanceState::Crashed: - return To == HubInstanceState::Provisioning || To == HubInstanceState::Deprovisioning || To == HubInstanceState::Recovering; + return To == HubInstanceState::Provisioning || To == HubInstanceState::Deprovisioning || + To == HubInstanceState::Recovering || To == HubInstanceState::Obliterating; case HubInstanceState::Provisioning: return To == HubInstanceState::Provisioned || To == HubInstanceState::Unprovisioned || To == HubInstanceState::Crashed; case HubInstanceState::Hibernating: @@ -1153,6 +1368,8 @@ Hub::UpdateInstanceStateLocked(size_t ActiveInstanceIndex, HubInstanceState NewS To == HubInstanceState::Crashed; case HubInstanceState::Recovering: return To == HubInstanceState::Provisioned || To == HubInstanceState::Unprovisioned; + case HubInstanceState::Obliterating: + return To == HubInstanceState::Unprovisioned || To == HubInstanceState::Crashed; } return false; }(m_ActiveInstances[ActiveInstanceIndex].State.load(), NewState)); @@ -1399,7 +1616,7 @@ Hub::CheckInstanceStatus(HttpClient& ActivityCheckClient, } else { - // transitional state (Provisioning, Deprovisioning, Hibernating, Waking, Recovering) - expected, skip. + // transitional state (Provisioning, Deprovisioning, Hibernating, Waking, Recovering, Obliterating) - expected, skip. // Crashed is handled above via AttemptRecoverInstance; it appears here only when the instance // lock was busy on a previous cycle and recovery is already pending. return true; @@ -2024,7 +2241,7 @@ TEST_CASE("hub.job_object") } # endif // ZEN_PLATFORM_WINDOWS -TEST_CASE("hub.hibernate_wake") +TEST_CASE("hub.hibernate_wake_obliterate") { ScopedTemporaryDirectory TempDir; Hub::Configuration Config; @@ -2087,14 +2304,63 @@ TEST_CASE("hub.hibernate_wake") CHECK(ModClient.Get("/health/")); } - // Deprovision - const Hub::Response DeprovisionResult = HubInstance->Deprovision("hib_a"); - CHECK(DeprovisionResult.ResponseCode == Hub::EResponseCode::Completed); + // Hibernate again for obliterate-from-hibernated test + { + const Hub::Response R = HubInstance->Hibernate("hib_a"); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + REQUIRE(HubInstance->Find("hib_a", &Info)); + CHECK_EQ(Info.State, HubInstanceState::Hibernated); + + // Obliterate from hibernated + { + const Hub::Response R = HubInstance->Obliterate("hib_a"); + CHECK(R.ResponseCode == Hub::EResponseCode::Completed); + } + CHECK_EQ(HubInstance->GetInstanceCount(), 0); + CHECK_FALSE(HubInstance->Find("hib_a")); + + // Re-provision for obliterate-from-provisioned test + { + const Hub::Response R = HubInstance->Provision("hib_a", ProvInfo); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + REQUIRE(HubInstance->Find("hib_a", &Info)); + CHECK_EQ(Info.State, HubInstanceState::Provisioned); + { + HttpClient ModClient(fmt::format("http://localhost:{}", ProvInfo.Port), kFastTimeout); + CHECK(ModClient.Get("/health/")); + } + + // Obliterate from provisioned + { + const Hub::Response R = HubInstance->Obliterate("hib_a"); + CHECK(R.ResponseCode == Hub::EResponseCode::Completed); + } + CHECK_EQ(HubInstance->GetInstanceCount(), 0); CHECK_FALSE(HubInstance->Find("hib_a")); { HttpClient ModClient(fmt::format("http://localhost:{}", ProvInfo.Port), kFastTimeout); CHECK(!ModClient.Get("/health/")); } + + // Obliterate deprovisioned module (not tracked by hub, backend data may exist) + { + const Hub::Response R = HubInstance->Provision("hib_a", ProvInfo); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + { + const Hub::Response R = HubInstance->Deprovision("hib_a"); + REQUIRE_MESSAGE(R.ResponseCode == Hub::EResponseCode::Completed, R.Message); + } + CHECK_FALSE(HubInstance->Find("hib_a")); + { + const Hub::Response R = HubInstance->Obliterate("hib_a"); + CHECK(R.ResponseCode == Hub::EResponseCode::Completed); + } + + // Obliterate of a never-provisioned module also succeeds (no-op backend cleanup) + CHECK(HubInstance->Obliterate("never_existed").ResponseCode == Hub::EResponseCode::Completed); } TEST_CASE("hub.async_hibernate_wake") diff --git a/src/zenserver/hub/hub.h b/src/zenserver/hub/hub.h index 071b14f35..97a238c01 100644 --- a/src/zenserver/hub/hub.h +++ b/src/zenserver/hub/hub.h @@ -18,6 +18,7 @@ #include <memory> #include <thread> #include <unordered_map> +#include <unordered_set> namespace zen { @@ -133,6 +134,14 @@ public: Response Deprovision(const std::string& ModuleId); /** + * Obliterate a storage server instance and all associated data. + * Shuts down the process, deletes backend hydration data, and cleans local state. + * + * @param ModuleId The ID of the module to obliterate. + */ + Response Obliterate(const std::string& ModuleId); + + /** * Hibernate a storage server instance for the given module ID. * The instance is shut down but its data is preserved; it can be woken later. * @@ -257,12 +266,13 @@ private: } HubInstanceState UpdateInstanceStateLocked(size_t ActiveInstanceIndex, HubInstanceState NewState); - std::vector<ActiveInstance> m_ActiveInstances; - std::deque<size_t> m_FreeActiveInstanceIndexes; - SystemMetrics m_SystemMetrics; - DiskSpace m_DiskSpace; - std::atomic<int> m_MaxInstanceCount = 0; - std::thread m_WatchDog; + std::vector<ActiveInstance> m_ActiveInstances; + std::deque<size_t> m_FreeActiveInstanceIndexes; + SystemMetrics m_SystemMetrics; + DiskSpace m_DiskSpace; + std::atomic<int> m_MaxInstanceCount = 0; + std::thread m_WatchDog; + std::unordered_set<std::string> m_ObliteratingInstances; Event m_WatchDogEvent; void WatchDog(); @@ -281,8 +291,11 @@ private: HubInstanceState OldState, bool IsNewInstance); void CompleteDeprovision(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState); + void CompleteObliterate(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex); void CompleteHibernate(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState); void CompleteWake(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, HubInstanceState OldState); + void RemoveInstance(StorageServerInstance::ExclusiveLockedPtr& Instance, size_t ActiveInstanceIndex, std::string_view ModuleId); + void ObliterateBackendData(std::string_view ModuleId); // Notifications may fire slightly out of sync with the Hub's internal State flag. // The guarantee is that notifications are sent in the correct order, but the State diff --git a/src/zenserver/hub/hubinstancestate.cpp b/src/zenserver/hub/hubinstancestate.cpp index c47fdd294..310305e5d 100644 --- a/src/zenserver/hub/hubinstancestate.cpp +++ b/src/zenserver/hub/hubinstancestate.cpp @@ -29,6 +29,8 @@ ToString(HubInstanceState State) return "crashed"; case HubInstanceState::Recovering: return "recovering"; + case HubInstanceState::Obliterating: + return "obliterating"; } ZEN_ASSERT(false); return "unknown"; diff --git a/src/zenserver/hub/hubinstancestate.h b/src/zenserver/hub/hubinstancestate.h index c895f75d1..c7188aa5c 100644 --- a/src/zenserver/hub/hubinstancestate.h +++ b/src/zenserver/hub/hubinstancestate.h @@ -20,7 +20,8 @@ enum class HubInstanceState : uint32_t Hibernating, // Provisioned -> Hibernated (Shutting down process, preserving data on disk) Waking, // Hibernated -> Provisioned (Starting process from preserved data) Deprovisioning, // Provisioned/Hibernated/Crashed -> Unprovisioned (Shutting down process and cleaning up data) - Recovering, // Crashed -> Provisioned/Deprovisioned (Attempting in-place restart after a crash) + Recovering, // Crashed -> Provisioned/Unprovisioned (Attempting in-place restart after a crash) + Obliterating, // Provisioned/Hibernated/Crashed -> Unprovisioned (Destroying all local and backend data) }; std::string_view ToString(HubInstanceState State); diff --git a/src/zenserver/hub/hydration.cpp b/src/zenserver/hub/hydration.cpp index cf36d8646..673306cde 100644 --- a/src/zenserver/hub/hydration.cpp +++ b/src/zenserver/hub/hydration.cpp @@ -87,22 +87,23 @@ namespace hydration_impl { virtual void Configure(std::string_view ModuleId, const std::filesystem::path& TempDir, std::string_view TargetSpecification, - const CbObject& Options) = 0; - virtual void SaveMetadata(const CbObject& Data) = 0; - virtual CbObject LoadMetadata() = 0; - virtual CbObject GetSettings() = 0; - virtual void ParseSettings(const CbObjectView& Settings) = 0; - virtual std::vector<IoHash> List() = 0; + const CbObject& Options) = 0; + virtual void SaveMetadata(const CbObject& Data) = 0; + virtual CbObject LoadMetadata() = 0; + virtual CbObject GetSettings() = 0; + virtual void ParseSettings(const CbObjectView& Settings) = 0; + virtual std::vector<IoHash> List() = 0; virtual void Put(ParallelWork& Work, WorkerThreadPool& WorkerPool, const IoHash& Hash, uint64_t Size, - const std::filesystem::path& SourcePath) = 0; + const std::filesystem::path& SourcePath) = 0; virtual void Get(ParallelWork& Work, WorkerThreadPool& WorkerPool, const IoHash& Hash, uint64_t Size, - const std::filesystem::path& DestinationPath) = 0; + const std::filesystem::path& DestinationPath) = 0; + virtual void Delete(ParallelWork& Work, WorkerThreadPool& WorkerPool) = 0; }; constexpr std::string_view FileHydratorPrefix = "file://"; @@ -226,6 +227,13 @@ namespace hydration_impl { }); } + virtual void Delete(ParallelWork& Work, WorkerThreadPool& WorkerPool) override + { + ZEN_UNUSED(Work); + ZEN_UNUSED(WorkerPool); + DeleteDirectories(m_StoragePath); + } + private: std::filesystem::path m_StoragePath; std::filesystem::path m_StatePathName; @@ -513,6 +521,30 @@ namespace hydration_impl { } } + virtual void Delete(ParallelWork& Work, WorkerThreadPool& WorkerPool) override + { + std::string Prefix = m_KeyPrefix + "/"; + S3ListObjectsResult ListResult = m_Client->ListObjects(Prefix); + if (!ListResult.IsSuccess()) + { + throw zen::runtime_error("Failed to list S3 objects for deletion under '{}': {}", Prefix, ListResult.Error); + } + for (const S3ObjectInfo& Obj : ListResult.Objects) + { + Work.ScheduleWork(WorkerPool, [this, Key = Obj.Key](std::atomic<bool>& AbortFlag) { + if (AbortFlag.load()) + { + return; + } + S3Result DelResult = m_Client->DeleteObject(Key); + if (!DelResult.IsSuccess()) + { + throw zen::runtime_error("Failed to delete S3 object '{}': {}", Key, DelResult.Error); + } + }); + } + } + private: std::unique_ptr<S3Client> CreateS3Client() const { @@ -569,6 +601,7 @@ public: virtual void Configure(const HydrationConfig& Config) override; virtual void Dehydrate(const CbObject& CachedState) override; virtual CbObject Hydrate() override; + virtual void Obliterate() override; private: struct Entry @@ -986,6 +1019,27 @@ IncrementalHydrator::Hydrate() } } +void +IncrementalHydrator::Obliterate() +{ + const std::filesystem::path ServerStateDir = MakeSafeAbsolutePath(m_Config.ServerStateDir); + const std::filesystem::path TempDir = MakeSafeAbsolutePath(m_Config.TempDir); + + try + { + ParallelWork Work(*m_Threading.AbortFlag, *m_Threading.PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + m_Storage->Delete(Work, *m_Threading.WorkerPool); + Work.Wait(); + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed to delete backend storage for module '{}': {}. Proceeding with local cleanup.", m_Config.ModuleId, Ex.what()); + } + + CleanDirectory(*m_Threading.WorkerPool, *m_Threading.AbortFlag, *m_Threading.PauseFlag, ServerStateDir); + CleanDirectory(*m_Threading.WorkerPool, *m_Threading.AbortFlag, *m_Threading.PauseFlag, TempDir); +} + std::unique_ptr<HydrationStrategyBase> CreateHydrator(const HydrationConfig& Config) { @@ -1299,6 +1353,55 @@ TEST_CASE("hydration.file.excluded_files_not_dehydrated") } // --------------------------------------------------------------------------- +// FileHydrator obliterate test +// --------------------------------------------------------------------------- + +TEST_CASE("hydration.file.obliterate") +{ + ScopedTemporaryDirectory TempDir; + + std::filesystem::path ServerStateDir = TempDir.Path() / "server_state"; + std::filesystem::path HydrationStore = TempDir.Path() / "hydration_store"; + std::filesystem::path HydrationTemp = TempDir.Path() / "hydration_temp"; + CreateDirectories(ServerStateDir); + CreateDirectories(HydrationStore); + CreateDirectories(HydrationTemp); + + const std::string ModuleId = "obliterate_test"; + CreateSmallTestTree(ServerStateDir); + + HydrationConfig Config; + Config.ServerStateDir = ServerStateDir; + Config.TempDir = HydrationTemp; + Config.ModuleId = ModuleId; + Config.TargetSpecification = "file://" + HydrationStore.string(); + + // Dehydrate so the backend store has data + { + std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); + Hydrator->Dehydrate(CbObject()); + } + CHECK(std::filesystem::exists(HydrationStore / ModuleId)); + + // Put some files back in ServerStateDir and TempDir to verify cleanup + CreateSmallTestTree(ServerStateDir); + WriteFile(HydrationTemp / "leftover.tmp", CreateSemiRandomBlob(64)); + + // Obliterate + { + std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); + Hydrator->Obliterate(); + } + + // Backend store directory deleted + CHECK_FALSE(std::filesystem::exists(HydrationStore / ModuleId)); + // ServerStateDir cleaned + CHECK(std::filesystem::is_empty(ServerStateDir)); + // TempDir cleaned + CHECK(std::filesystem::is_empty(HydrationTemp)); +} + +// --------------------------------------------------------------------------- // FileHydrator concurrent test // --------------------------------------------------------------------------- @@ -1551,6 +1654,78 @@ TEST_CASE("hydration.s3.concurrent") } } +TEST_CASE("hydration.s3.obliterate") +{ + MinioProcessOptions MinioOpts; + MinioOpts.Port = 19019; + MinioProcess Minio(MinioOpts); + Minio.SpawnMinioServer(); + Minio.CreateBucket("zen-hydration-test"); + + ScopedEnvVar EnvAccessKey("AWS_ACCESS_KEY_ID", Minio.RootUser()); + ScopedEnvVar EnvSecretKey("AWS_SECRET_ACCESS_KEY", Minio.RootPassword()); + + ScopedTemporaryDirectory TempDir; + + std::filesystem::path ServerStateDir = TempDir.Path() / "server_state"; + std::filesystem::path HydrationTemp = TempDir.Path() / "hydration_temp"; + CreateDirectories(ServerStateDir); + CreateDirectories(HydrationTemp); + + const std::string ModuleId = "s3test_obliterate"; + + HydrationConfig Config; + Config.ServerStateDir = ServerStateDir; + Config.TempDir = HydrationTemp; + Config.ModuleId = ModuleId; + { + std::string ConfigJson = + fmt::format(R"({{"type":"s3","settings":{{"uri":"s3://zen-hydration-test","endpoint":"{}","path-style":true}}}})", + Minio.Endpoint()); + std::string ParseError; + CbFieldIterator Root = LoadCompactBinaryFromJson(ConfigJson, ParseError); + ZEN_ASSERT(ParseError.empty() && Root.IsObject()); + Config.Options = std::move(Root).AsObject(); + } + + // Dehydrate to populate backend + CreateSmallTestTree(ServerStateDir); + { + std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); + Hydrator->Dehydrate(CbObject()); + } + + auto ListModuleObjects = [&]() { + S3ClientOptions Opts; + Opts.BucketName = "zen-hydration-test"; + Opts.Endpoint = Minio.Endpoint(); + Opts.PathStyle = true; + Opts.Credentials.AccessKeyId = Minio.RootUser(); + Opts.Credentials.SecretAccessKey = Minio.RootPassword(); + S3Client Client(Opts); + return Client.ListObjects(ModuleId + "/"); + }; + + // Verify objects exist in S3 + CHECK(!ListModuleObjects().Objects.empty()); + + // Re-populate ServerStateDir and TempDir for cleanup verification + CreateSmallTestTree(ServerStateDir); + WriteFile(HydrationTemp / "leftover.tmp", CreateSemiRandomBlob(64)); + + // Obliterate + { + std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); + Hydrator->Obliterate(); + } + + // Verify S3 objects deleted + CHECK(ListModuleObjects().Objects.empty()); + // Local directories cleaned + CHECK(std::filesystem::is_empty(ServerStateDir)); + CHECK(std::filesystem::is_empty(HydrationTemp)); +} + TEST_CASE("hydration.s3.config_overrides") { MinioProcessOptions MinioOpts; diff --git a/src/zenserver/hub/hydration.h b/src/zenserver/hub/hydration.h index 7edf5d996..fc2f309b2 100644 --- a/src/zenserver/hub/hydration.h +++ b/src/zenserver/hub/hydration.h @@ -57,6 +57,9 @@ struct HydrationStrategyBase // Download state from the configured target into ServerStateDir. Returns cached state for the next Dehydrate. // On failure, ServerStateDir is wiped and an empty CbObject is returned. virtual CbObject Hydrate() = 0; + + // Delete all stored data for this module from the configured backend, then clean ServerStateDir and TempDir. + virtual void Obliterate() = 0; }; // Create a configured hydrator based on Config. Ready to call Hydrate/Dehydrate immediately. diff --git a/src/zenserver/hub/storageserverinstance.cpp b/src/zenserver/hub/storageserverinstance.cpp index 6185a7f19..b31a64e56 100644 --- a/src/zenserver/hub/storageserverinstance.cpp +++ b/src/zenserver/hub/storageserverinstance.cpp @@ -16,8 +16,6 @@ StorageServerInstance::StorageServerInstance(ZenServerEnvironment& RunEnvironmen , m_ModuleId(ModuleId) , m_ServerInstance(RunEnvironment, ZenServerInstance::ServerMode::kStorageServer) { - m_BaseDir = RunEnvironment.CreateChildDir(ModuleId); - m_TempDir = Config.HydrationTempPath / ModuleId; } StorageServerInstance::~StorageServerInstance() @@ -31,7 +29,7 @@ StorageServerInstance::SpawnServerProcess() m_ServerInstance.ResetDeadProcess(); m_ServerInstance.SetServerExecutablePath(GetRunningExecutablePath()); - m_ServerInstance.SetDataDir(m_BaseDir); + m_ServerInstance.SetDataDir(m_Config.StateDir); #if ZEN_PLATFORM_WINDOWS m_ServerInstance.SetJobObject(m_JobObject); #endif @@ -77,7 +75,7 @@ StorageServerInstance::ProvisionLocked() return; } - ZEN_INFO("Provisioning storage server instance for module '{}', at '{}'", m_ModuleId, m_BaseDir); + ZEN_INFO("Provisioning storage server instance for module '{}', at '{}'", m_ModuleId, m_Config.StateDir); try { Hydrate(); @@ -87,7 +85,7 @@ StorageServerInstance::ProvisionLocked() { ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during provisioning. Reason: {}", m_ModuleId, - m_BaseDir, + m_Config.StateDir, Ex.what()); throw; } @@ -117,6 +115,22 @@ StorageServerInstance::DeprovisionLocked() } void +StorageServerInstance::ObliterateLocked() +{ + if (m_ServerInstance.IsRunning()) + { + // m_ServerInstance.Shutdown() never throws. + m_ServerInstance.Shutdown(); + } + + std::atomic<bool> AbortFlag{false}; + std::atomic<bool> PauseFlag{false}; + HydrationConfig Config = MakeHydrationConfig(AbortFlag, PauseFlag); + std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); + Hydrator->Obliterate(); +} + +void StorageServerInstance::HibernateLocked() { // Signal server to shut down, but keep data around for later wake @@ -146,7 +160,10 @@ StorageServerInstance::WakeLocked() } catch (const std::exception& Ex) { - ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during waking. Reason: {}", m_ModuleId, m_BaseDir, Ex.what()); + ZEN_WARN("Failed spawning server instance for module '{}', at '{}' during waking. Reason: {}", + m_ModuleId, + m_Config.StateDir, + Ex.what()); throw; } } @@ -154,20 +171,9 @@ StorageServerInstance::WakeLocked() void StorageServerInstance::Hydrate() { - std::atomic<bool> AbortFlag{false}; - std::atomic<bool> PauseFlag{false}; - - HydrationConfig Config{.ServerStateDir = m_BaseDir, - .TempDir = m_TempDir, - .ModuleId = m_ModuleId, - .TargetSpecification = m_Config.HydrationTargetSpecification, - .Options = m_Config.HydrationOptions}; - if (m_Config.OptionalWorkerPool) - { - Config.Threading.emplace( - HydrationConfig::ThreadingOptions{.WorkerPool = m_Config.OptionalWorkerPool, .AbortFlag = &AbortFlag, .PauseFlag = &PauseFlag}); - } - + std::atomic<bool> AbortFlag{false}; + std::atomic<bool> PauseFlag{false}; + HydrationConfig Config = MakeHydrationConfig(AbortFlag, PauseFlag); std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); m_HydrationState = Hydrator->Hydrate(); } @@ -175,11 +181,18 @@ StorageServerInstance::Hydrate() void StorageServerInstance::Dehydrate() { - std::atomic<bool> AbortFlag{false}; - std::atomic<bool> PauseFlag{false}; + std::atomic<bool> AbortFlag{false}; + std::atomic<bool> PauseFlag{false}; + HydrationConfig Config = MakeHydrationConfig(AbortFlag, PauseFlag); + std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); + Hydrator->Dehydrate(m_HydrationState); +} - HydrationConfig Config{.ServerStateDir = m_BaseDir, - .TempDir = m_TempDir, +HydrationConfig +StorageServerInstance::MakeHydrationConfig(std::atomic<bool>& AbortFlag, std::atomic<bool>& PauseFlag) +{ + HydrationConfig Config{.ServerStateDir = m_Config.StateDir, + .TempDir = m_Config.TempDir, .ModuleId = m_ModuleId, .TargetSpecification = m_Config.HydrationTargetSpecification, .Options = m_Config.HydrationOptions}; @@ -189,8 +202,7 @@ StorageServerInstance::Dehydrate() HydrationConfig::ThreadingOptions{.WorkerPool = m_Config.OptionalWorkerPool, .AbortFlag = &AbortFlag, .PauseFlag = &PauseFlag}); } - std::unique_ptr<HydrationStrategyBase> Hydrator = CreateHydrator(Config); - Hydrator->Dehydrate(m_HydrationState); + return Config; } StorageServerInstance::SharedLockedPtr::SharedLockedPtr() : m_Lock(nullptr), m_Instance(nullptr) @@ -359,6 +371,13 @@ StorageServerInstance::ExclusiveLockedPtr::Deprovision() } void +StorageServerInstance::ExclusiveLockedPtr::Obliterate() +{ + ZEN_ASSERT(m_Instance != nullptr); + m_Instance->ObliterateLocked(); +} + +void StorageServerInstance::ExclusiveLockedPtr::Hibernate() { ZEN_ASSERT(m_Instance != nullptr); diff --git a/src/zenserver/hub/storageserverinstance.h b/src/zenserver/hub/storageserverinstance.h index d8bebc48d..e2c5eb562 100644 --- a/src/zenserver/hub/storageserverinstance.h +++ b/src/zenserver/hub/storageserverinstance.h @@ -2,6 +2,8 @@ #pragma once +#include "hydration.h" + #include <zencore/compactbinary.h> #include <zenutil/zenserverprocess.h> @@ -25,7 +27,8 @@ public: struct Configuration { uint16_t BasePort; - std::filesystem::path HydrationTempPath; + std::filesystem::path StateDir; + std::filesystem::path TempDir; std::string HydrationTargetSpecification; CbObject HydrationOptions; uint32_t HttpThreadCount = 0; // Automatic @@ -113,6 +116,7 @@ public: void Provision(); void Deprovision(); + void Obliterate(); void Hibernate(); void Wake(); @@ -126,6 +130,7 @@ public: private: void ProvisionLocked(); void DeprovisionLocked(); + void ObliterateLocked(); void HibernateLocked(); void WakeLocked(); @@ -135,10 +140,6 @@ private: std::string m_ModuleId; ZenServerInstance m_ServerInstance; - std::filesystem::path m_BaseDir; - - std::filesystem::path m_TempDir; - CbObject m_HydrationState; #if ZEN_PLATFORM_WINDOWS @@ -147,8 +148,9 @@ private: void SpawnServerProcess(); - void Hydrate(); - void Dehydrate(); + void Hydrate(); + void Dehydrate(); + HydrationConfig MakeHydrationConfig(std::atomic<bool>& AbortFlag, std::atomic<bool>& PauseFlag); friend class SharedLockedPtr; friend class ExclusiveLockedPtr; |