aboutsummaryrefslogtreecommitdiff
path: root/src/zenserver/hub
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2026-03-11 09:45:31 +0100
committerGitHub Enterprise <[email protected]>2026-03-11 09:45:31 +0100
commite9d04250225a430cffed28e6a49299e3da542f97 (patch)
tree7f99594bd930c072e159458319e822bbb18794f7 /src/zenserver/hub
parentminor zenstore/blockstore fixes (#821) (diff)
downloadzen-e9d04250225a430cffed28e6a49299e3da542f97.tar.xz
zen-e9d04250225a430cffed28e6a49299e3da542f97.zip
hub consul integration (#820)
- Feature: Basic consul integration for zenserver hub mode, restricted to host local consul agent and register/deregister of services - Feature: Added new options to zenserver hub mode - `consul-endpoint` - Consul endpoint URL for service registration (empty = disabled) - `hub-base-port-number` - Base port number for provisioned instances - `hub-instance-limit` - Maximum number of provisioned instances for this hub - `hub-use-job-object` - Enable the use of a Windows Job Object for child process management (Windows only)
Diffstat (limited to 'src/zenserver/hub')
-rw-r--r--src/zenserver/hub/httphubservice.cpp (renamed from src/zenserver/hub/hubservice.cpp)6
-rw-r--r--src/zenserver/hub/httphubservice.h (renamed from src/zenserver/hub/hubservice.h)0
-rw-r--r--src/zenserver/hub/hub.cpp507
-rw-r--r--src/zenserver/hub/hub.h50
-rw-r--r--src/zenserver/hub/zenhubserver.cpp155
-rw-r--r--src/zenserver/hub/zenhubserver.h16
6 files changed, 698 insertions, 36 deletions
diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/httphubservice.cpp
index 6765340dc..67ed0cfd8 100644
--- a/src/zenserver/hub/hubservice.cpp
+++ b/src/zenserver/hub/httphubservice.cpp
@@ -1,6 +1,6 @@
// Copyright Epic Games, Inc. All Rights Reserved.
-#include "hubservice.h"
+#include "httphubservice.h"
#include "hub.h"
#include "storageserverinstance.h"
@@ -74,7 +74,7 @@ HttpHubService::HttpHubService(Hub& Hub) : m_Hub(Hub)
try
{
- Hub::ProvisionedInstanceInfo Info;
+ HubProvisionedInstanceInfo Info;
if (m_Hub.Provision(ModuleId, /* out */ Info, /* out */ FailureReason))
{
CbObjectWriter Obj;
@@ -144,7 +144,7 @@ HttpHubService::HttpHubService(Hub& Hub) : m_Hub(Hub)
CbObjectWriter Obj;
Obj << "currentInstanceCount" << m_Hub.GetInstanceCount();
Obj << "maxInstanceCount" << m_Hub.GetMaxInstanceCount();
- Obj << "instanceLimit" << m_Hub.GetInstanceLimit();
+ Obj << "instanceLimit" << m_Hub.GetConfig().InstanceLimit;
Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save());
},
HttpVerb::kGet);
diff --git a/src/zenserver/hub/hubservice.h b/src/zenserver/hub/httphubservice.h
index d08eeea2a..d08eeea2a 100644
--- a/src/zenserver/hub/hubservice.h
+++ b/src/zenserver/hub/httphubservice.h
diff --git a/src/zenserver/hub/hub.cpp b/src/zenserver/hub/hub.cpp
index f1cc86de6..c9720b32d 100644
--- a/src/zenserver/hub/hub.cpp
+++ b/src/zenserver/hub/hub.cpp
@@ -15,6 +15,13 @@ ZEN_THIRD_PARTY_INCLUDES_START
#include <gsl/gsl-lite.hpp>
ZEN_THIRD_PARTY_INCLUDES_END
+#if ZEN_WITH_TESTS
+# include <zencore/filesystem.h>
+# include <zencore/testing.h>
+# include <zencore/testutils.h>
+# include <zencore/workthreadpool.h>
+#endif
+
namespace zen {
///////////////////////////////////////////////////////////////////////////
@@ -111,14 +118,19 @@ private:
//////////////////////////////////////////////////////////////////////////
-Hub::Hub(const Configuration& Config) : m_UseJobObject(Config.UseJobObject)
+Hub::Hub(const Configuration& Config,
+ ZenServerEnvironment&& RunEnvironment,
+ ProvisionModuleCallbackFunc&& ProvisionedModuleCallback,
+ ProvisionModuleCallbackFunc&& DeprovisionedModuleCallback)
+: m_Config(Config)
+, m_RunEnvironment(std::move(RunEnvironment))
+, m_ProvisionedModuleCallback(std::move(ProvisionedModuleCallback))
+, m_DeprovisionedModuleCallback(std::move(DeprovisionedModuleCallback))
{
m_HostMetrics = GetSystemMetrics();
m_ResourceLimits.DiskUsageBytes = 1000ull * 1024 * 1024 * 1024;
m_ResourceLimits.MemoryUsageBytes = 16ull * 1024 * 1024 * 1024;
- m_RunEnvironment.InitializeForHub(Config.HubBaseDir, Config.ChildBaseDir);
-
m_FileHydrationPath = m_RunEnvironment.CreateChildDir("hydration_storage");
ZEN_INFO("using file hydration path: '{}'", m_FileHydrationPath);
@@ -128,13 +140,10 @@ Hub::Hub(const Configuration& Config) : m_UseJobObject(Config.UseJobObject)
// This is necessary to ensure the hub assigns a distinct port range.
// We need to do this primarily because otherwise automated tests will
// fail as the test runner will create processes in the default range.
- // We should probably make this configurable or dynamic for maximum
- // flexibility, and to allow running multiple hubs on the same host if
- // necessary.
- m_RunEnvironment.SetNextPortNumber(21000);
+ m_RunEnvironment.SetNextPortNumber(m_Config.BasePortNumber);
#if ZEN_PLATFORM_WINDOWS
- if (m_UseJobObject)
+ if (m_Config.UseJobObject)
{
m_JobObject.Initialize();
if (m_JobObject.IsValid())
@@ -158,6 +167,20 @@ Hub::~Hub()
m_Lock.WithExclusiveLock([this] {
for (auto& [ModuleId, Instance] : m_Instances)
{
+ uint16_t BasePort = Instance->GetBasePort();
+ std::string BaseUri; // TODO?
+
+ if (m_DeprovisionedModuleCallback)
+ {
+ try
+ {
+ m_DeprovisionedModuleCallback(ModuleId, HubProvisionedInstanceInfo{.BaseUri = BaseUri, .Port = BasePort});
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("Deprovision callback for module {} failed. Reason: '{}'", ModuleId, Ex.what());
+ }
+ }
Instance->Deprovision();
}
m_Instances.clear();
@@ -170,7 +193,7 @@ Hub::~Hub()
}
bool
-Hub::Provision(std::string_view ModuleId, ProvisionedInstanceInfo& OutInfo, std::string& OutReason)
+Hub::Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo, std::string& OutReason)
{
StorageServerInstance* Instance = nullptr;
bool IsNewInstance = false;
@@ -241,9 +264,20 @@ Hub::Provision(std::string_view ModuleId, ProvisionedInstanceInfo& OutInfo, std:
}
OutInfo.Port = Instance->GetBasePort();
-
// TODO: base URI? Would need to know what host name / IP to use
+ if (m_ProvisionedModuleCallback)
+ {
+ try
+ {
+ m_ProvisionedModuleCallback(ModuleId, OutInfo);
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("Provision callback for module {} failed. Reason: '{}'", ModuleId, Ex.what());
+ }
+ }
+
return true;
}
@@ -279,6 +313,21 @@ Hub::Deprovision(const std::string& ModuleId, std::string& OutReason)
}
}
+ uint16_t BasePort = Instance->GetBasePort();
+ std::string BaseUri; // TODO?
+
+ if (m_DeprovisionedModuleCallback)
+ {
+ try
+ {
+ m_DeprovisionedModuleCallback(ModuleId, HubProvisionedInstanceInfo{.BaseUri = BaseUri, .Port = BasePort});
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("Deprovision callback for module {} failed. Reason: '{}'", ModuleId, Ex.what());
+ }
+ }
+
// The module is deprovisioned outside the lock to avoid blocking other operations.
//
// To ensure that no new provisioning can occur while we're deprovisioning,
@@ -362,9 +411,9 @@ Hub::CanProvisionInstance(std::string_view ModuleId, std::string& OutReason)
return false;
}
- if (gsl::narrow_cast<int>(m_Instances.size()) >= m_InstanceLimit)
+ if (gsl::narrow_cast<int>(m_Instances.size()) >= m_Config.InstanceLimit)
{
- OutReason = fmt::format("instance limit exceeded ({})", m_InstanceLimit);
+ OutReason = fmt::format("instance limit exceeded ({})", m_Config.InstanceLimit);
return false;
}
@@ -374,6 +423,438 @@ Hub::CanProvisionInstance(std::string_view ModuleId, std::string& OutReason)
return true;
}
-///////////////////////////////////////////////////////////////////////////
+#if ZEN_WITH_TESTS
+
+TEST_SUITE_BEGIN("server.hub");
+
+namespace hub_testutils {
+
+ ZenServerEnvironment MakeHubEnvironment(const std::filesystem::path& BaseDir)
+ {
+ return ZenServerEnvironment(ZenServerEnvironment::Hub, GetRunningExecutablePath().parent_path(), BaseDir);
+ }
+
+ std::unique_ptr<Hub> MakeHub(const std::filesystem::path& BaseDir,
+ Hub::Configuration Config = {},
+ Hub::ProvisionModuleCallbackFunc ProvisionCallback = {},
+ Hub::ProvisionModuleCallbackFunc DeprovisionCallback = {})
+ {
+ return std::make_unique<Hub>(Config, MakeHubEnvironment(BaseDir), std::move(ProvisionCallback), std::move(DeprovisionCallback));
+ }
+
+} // namespace hub_testutils
+
+TEST_CASE("hub.provision_basic")
+{
+ ScopedTemporaryDirectory TempDir;
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path());
+
+ CHECK_EQ(HubInstance->GetInstanceCount(), 0);
+ CHECK_FALSE(HubInstance->Find("module_a"));
+
+ HubProvisionedInstanceInfo Info;
+ std::string Reason;
+ const bool ProvisionResult = HubInstance->Provision("module_a", Info, Reason);
+ REQUIRE_MESSAGE(ProvisionResult, Reason);
+ CHECK_NE(Info.Port, 0);
+ CHECK_EQ(HubInstance->GetInstanceCount(), 1);
+ CHECK(HubInstance->Find("module_a"));
+
+ const bool DeprovisionResult = HubInstance->Deprovision("module_a", Reason);
+ CHECK(DeprovisionResult);
+ CHECK_EQ(HubInstance->GetInstanceCount(), 0);
+ CHECK_FALSE(HubInstance->Find("module_a"));
+}
+
+TEST_CASE("hub.provision_callbacks")
+{
+ ScopedTemporaryDirectory TempDir;
+
+ struct CallbackRecord
+ {
+ std::string ModuleId;
+ uint16_t Port;
+ };
+ RwLock CallbackMutex;
+ std::vector<CallbackRecord> ProvisionRecords;
+ std::vector<CallbackRecord> DeprovisionRecords;
+
+ auto ProvisionCb = [&](std::string_view ModuleId, const HubProvisionedInstanceInfo& Info) {
+ CallbackMutex.WithExclusiveLock([&]() { ProvisionRecords.push_back({std::string(ModuleId), Info.Port}); });
+ };
+ auto DeprovisionCb = [&](std::string_view ModuleId, const HubProvisionedInstanceInfo& Info) {
+ CallbackMutex.WithExclusiveLock([&]() { DeprovisionRecords.push_back({std::string(ModuleId), Info.Port}); });
+ };
+
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), {}, std::move(ProvisionCb), std::move(DeprovisionCb));
+
+ HubProvisionedInstanceInfo Info;
+ std::string Reason;
+
+ const bool ProvisionResult = HubInstance->Provision("cb_module", Info, Reason);
+ REQUIRE_MESSAGE(ProvisionResult, Reason);
+
+ {
+ RwLock::SharedLockScope _(CallbackMutex);
+ REQUIRE_EQ(ProvisionRecords.size(), 1u);
+ CHECK_EQ(ProvisionRecords[0].ModuleId, "cb_module");
+ CHECK_EQ(ProvisionRecords[0].Port, Info.Port);
+ CHECK_NE(ProvisionRecords[0].Port, 0);
+ }
+
+ const bool DeprovisionResult = HubInstance->Deprovision("cb_module", Reason);
+ CHECK(DeprovisionResult);
+
+ {
+ RwLock::SharedLockScope _(CallbackMutex);
+ REQUIRE_EQ(DeprovisionRecords.size(), 1u);
+ CHECK_EQ(DeprovisionRecords[0].ModuleId, "cb_module");
+ CHECK_NE(DeprovisionRecords[0].Port, 0);
+ CHECK_EQ(ProvisionRecords.size(), 1u);
+ }
+}
+
+TEST_CASE("hub.instance_limit")
+{
+ ScopedTemporaryDirectory TempDir;
+ Hub::Configuration Config;
+ Config.InstanceLimit = 2;
+ Config.BasePortNumber = 21500;
+
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config);
+
+ HubProvisionedInstanceInfo Info;
+ std::string Reason;
+
+ const bool FirstResult = HubInstance->Provision("limit_a", Info, Reason);
+ REQUIRE_MESSAGE(FirstResult, Reason);
+
+ const bool SecondResult = HubInstance->Provision("limit_b", Info, Reason);
+ REQUIRE_MESSAGE(SecondResult, Reason);
+ CHECK_EQ(HubInstance->GetInstanceCount(), 2);
+
+ Reason.clear();
+ const bool ThirdResult = HubInstance->Provision("limit_c", Info, Reason);
+ CHECK_FALSE(ThirdResult);
+ CHECK_EQ(HubInstance->GetInstanceCount(), 2);
+ CHECK_NE(Reason.find("instance limit"), std::string::npos);
+
+ HubInstance->Deprovision("limit_a", Reason);
+ CHECK_EQ(HubInstance->GetInstanceCount(), 1);
+
+ Reason.clear();
+ const bool FourthResult = HubInstance->Provision("limit_d", Info, Reason);
+ CHECK_MESSAGE(FourthResult, Reason);
+ CHECK_EQ(HubInstance->GetInstanceCount(), 2);
+}
+
+TEST_CASE("hub.deprovision_nonexistent")
+{
+ ScopedTemporaryDirectory TempDir;
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path());
+
+ std::string Reason;
+ const bool Result = HubInstance->Deprovision("never_provisioned", Reason);
+ CHECK_FALSE(Result);
+ CHECK(Reason.empty());
+ CHECK_EQ(HubInstance->GetInstanceCount(), 0);
+}
+
+TEST_CASE("hub.enumerate_modules")
+{
+ ScopedTemporaryDirectory TempDir;
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path());
+
+ HubProvisionedInstanceInfo Info;
+ std::string Reason;
+
+ REQUIRE_MESSAGE(HubInstance->Provision("enum_a", Info, Reason), Reason);
+ REQUIRE_MESSAGE(HubInstance->Provision("enum_b", Info, Reason), Reason);
+
+ std::vector<std::string> Ids;
+ HubInstance->EnumerateModules([&](StorageServerInstance& Instance) { Ids.push_back(std::string(Instance.GetModuleId())); });
+ CHECK_EQ(Ids.size(), 2u);
+ const bool FoundA = std::find(Ids.begin(), Ids.end(), "enum_a") != Ids.end();
+ const bool FoundB = std::find(Ids.begin(), Ids.end(), "enum_b") != Ids.end();
+ CHECK(FoundA);
+ CHECK(FoundB);
+
+ HubInstance->Deprovision("enum_a", Reason);
+ Ids.clear();
+ HubInstance->EnumerateModules([&](StorageServerInstance& Instance) { Ids.push_back(std::string(Instance.GetModuleId())); });
+ REQUIRE_EQ(Ids.size(), 1u);
+ CHECK_EQ(Ids[0], "enum_b");
+}
+
+TEST_CASE("hub.max_instance_count")
+{
+ ScopedTemporaryDirectory TempDir;
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path());
+
+ CHECK_EQ(HubInstance->GetMaxInstanceCount(), 0);
+
+ HubProvisionedInstanceInfo Info;
+ std::string Reason;
+
+ REQUIRE_MESSAGE(HubInstance->Provision("max_a", Info, Reason), Reason);
+ CHECK_GE(HubInstance->GetMaxInstanceCount(), 1);
+
+ REQUIRE_MESSAGE(HubInstance->Provision("max_b", Info, Reason), Reason);
+ CHECK_GE(HubInstance->GetMaxInstanceCount(), 2);
+
+ const int MaxAfterTwo = HubInstance->GetMaxInstanceCount();
+
+ HubInstance->Deprovision("max_a", Reason);
+ CHECK_EQ(HubInstance->GetInstanceCount(), 1);
+ CHECK_EQ(HubInstance->GetMaxInstanceCount(), MaxAfterTwo);
+}
+
+TEST_CASE("hub.concurrent")
+{
+ ScopedTemporaryDirectory TempDir;
+ Hub::Configuration Config;
+ Config.BasePortNumber = 22000;
+ Config.InstanceLimit = 10;
+
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config);
+
+ constexpr int kHalf = 3;
+
+ // Serially pre-provision kHalf modules
+ for (int I = 0; I < kHalf; ++I)
+ {
+ HubProvisionedInstanceInfo Info;
+ std::string Reason;
+ REQUIRE_MESSAGE(HubInstance->Provision(fmt::format("pre_{}", I), Info, Reason), Reason);
+ }
+ CHECK_EQ(HubInstance->GetInstanceCount(), kHalf);
+
+ // Simultaneously:
+ // Provisioner pool → provisions kHalf new modules ("new_0" .. "new_N")
+ // Deprovisioner pool → deprovisions the kHalf pre-provisioned modules ("pre_0" .. "pre_N")
+ // The two pools use distinct OS threads, so provisions and deprovisions are interleaved.
+
+ // Use int rather than bool to avoid std::vector<bool> bitfield packing,
+ // which would cause data races on concurrent per-index writes.
+ std::vector<int> ProvisionResults(kHalf, 0);
+ std::vector<std::string> ProvisionReasons(kHalf);
+ std::vector<int> DeprovisionResults(kHalf, 0);
+
+ {
+ WorkerThreadPool Provisioners(kHalf, "hub_test_provisioners");
+ WorkerThreadPool Deprovisioneers(kHalf, "hub_test_deprovisioneers");
+
+ std::vector<std::future<void>> ProvisionFutures(kHalf);
+ std::vector<std::future<void>> DeprovisionFutures(kHalf);
+
+ for (int I = 0; I < kHalf; ++I)
+ {
+ ProvisionFutures[I] = Provisioners.EnqueueTask(std::packaged_task<void()>([&, I] {
+ HubProvisionedInstanceInfo Info;
+ std::string Reason;
+ const bool Result =
+ HubInstance->Provision(fmt::format("new_{}", I), Info, Reason);
+ ProvisionResults[I] = Result ? 1 : 0;
+ ProvisionReasons[I] = Reason;
+ }),
+ WorkerThreadPool::EMode::EnableBacklog);
+
+ DeprovisionFutures[I] = Deprovisioneers.EnqueueTask(std::packaged_task<void()>([&, I] {
+ std::string Reason;
+ const bool Result =
+ HubInstance->Deprovision(fmt::format("pre_{}", I), Reason);
+ DeprovisionResults[I] = Result ? 1 : 0;
+ }),
+ WorkerThreadPool::EMode::EnableBacklog);
+ }
+
+ for (std::future<void>& F : ProvisionFutures)
+ {
+ F.get();
+ }
+ for (std::future<void>& F : DeprovisionFutures)
+ {
+ F.get();
+ }
+ }
+
+ for (int I = 0; I < kHalf; ++I)
+ {
+ CHECK_MESSAGE(ProvisionResults[I] != 0, ProvisionReasons[I]);
+ CHECK(DeprovisionResults[I] != 0);
+ }
+ // Only the newly provisioned modules should remain
+ CHECK_EQ(HubInstance->GetInstanceCount(), kHalf);
+}
+
+TEST_CASE("hub.concurrent_callbacks")
+{
+ ScopedTemporaryDirectory TempDir;
+ Hub::Configuration Config;
+ Config.BasePortNumber = 22300;
+ Config.InstanceLimit = 10;
+
+ struct CallbackRecord
+ {
+ std::string ModuleId;
+ uint16_t Port;
+ };
+ RwLock CallbackMutex;
+ std::vector<CallbackRecord> ProvisionCallbacks;
+ std::vector<CallbackRecord> DeprovisionCallbacks;
+
+ auto ProvisionCb = [&](std::string_view ModuleId, const HubProvisionedInstanceInfo& Info) {
+ CallbackMutex.WithExclusiveLock([&]() { ProvisionCallbacks.push_back({std::string(ModuleId), Info.Port}); });
+ };
+ auto DeprovisionCb = [&](std::string_view ModuleId, const HubProvisionedInstanceInfo& Info) {
+ CallbackMutex.WithExclusiveLock([&]() { DeprovisionCallbacks.push_back({std::string(ModuleId), Info.Port}); });
+ };
+
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config, std::move(ProvisionCb), std::move(DeprovisionCb));
+
+ constexpr int kHalf = 3;
+
+ // Serially pre-provision kHalf modules and drain the resulting callbacks before the
+ // concurrent phase so we start with a clean slate.
+ for (int I = 0; I < kHalf; ++I)
+ {
+ HubProvisionedInstanceInfo Info;
+ std::string Reason;
+ REQUIRE_MESSAGE(HubInstance->Provision(fmt::format("pre_{}", I), Info, Reason), Reason);
+ }
+ CHECK_EQ(HubInstance->GetInstanceCount(), kHalf);
+
+ {
+ RwLock::ExclusiveLockScope _(CallbackMutex);
+ REQUIRE_EQ(ProvisionCallbacks.size(), static_cast<size_t>(kHalf));
+ ProvisionCallbacks.clear();
+ }
+
+ // Concurrently provision kHalf new modules while deprovisioning the pre-provisioned ones.
+ std::vector<int> ProvisionResults(kHalf, 0);
+ std::vector<std::string> ProvisionReasons(kHalf);
+ std::vector<int> DeprovisionResults(kHalf, 0);
+
+ {
+ WorkerThreadPool Provisioners(kHalf, "hub_cbtest_provisioners");
+ WorkerThreadPool Deprovisioneers(kHalf, "hub_cbtest_deprovisioneers");
+
+ std::vector<std::future<void>> ProvisionFutures(kHalf);
+ std::vector<std::future<void>> DeprovisionFutures(kHalf);
+
+ for (int I = 0; I < kHalf; ++I)
+ {
+ ProvisionFutures[I] = Provisioners.EnqueueTask(std::packaged_task<void()>([&, I] {
+ HubProvisionedInstanceInfo Info;
+ std::string Reason;
+ const bool Result =
+ HubInstance->Provision(fmt::format("new_{}", I), Info, Reason);
+ ProvisionResults[I] = Result ? 1 : 0;
+ ProvisionReasons[I] = Reason;
+ }),
+ WorkerThreadPool::EMode::EnableBacklog);
+
+ DeprovisionFutures[I] = Deprovisioneers.EnqueueTask(std::packaged_task<void()>([&, I] {
+ std::string Reason;
+ const bool Result =
+ HubInstance->Deprovision(fmt::format("pre_{}", I), Reason);
+ DeprovisionResults[I] = Result ? 1 : 0;
+ }),
+ WorkerThreadPool::EMode::EnableBacklog);
+ }
+
+ for (std::future<void>& F : ProvisionFutures)
+ {
+ F.get();
+ }
+ for (std::future<void>& F : DeprovisionFutures)
+ {
+ F.get();
+ }
+ }
+
+ // All operations must have succeeded
+ for (int I = 0; I < kHalf; ++I)
+ {
+ CHECK_MESSAGE(ProvisionResults[I] != 0, ProvisionReasons[I]);
+ CHECK(DeprovisionResults[I] != 0);
+ }
+ CHECK_EQ(HubInstance->GetInstanceCount(), kHalf);
+
+ // Each new_* module must have triggered exactly one provision callback with a non-zero port.
+ // Each pre_* module must have triggered exactly one deprovision callback with a non-zero port.
+ {
+ RwLock::SharedLockScope _(CallbackMutex);
+ REQUIRE_EQ(ProvisionCallbacks.size(), static_cast<size_t>(kHalf));
+ REQUIRE_EQ(DeprovisionCallbacks.size(), static_cast<size_t>(kHalf));
+
+ for (const CallbackRecord& Record : ProvisionCallbacks)
+ {
+ CHECK_NE(Record.Port, 0);
+ const bool IsNewModule = Record.ModuleId.rfind("new_", 0) == 0;
+ CHECK_MESSAGE(IsNewModule, Record.ModuleId);
+ }
+ for (const CallbackRecord& Record : DeprovisionCallbacks)
+ {
+ CHECK_NE(Record.Port, 0);
+ const bool IsPreModule = Record.ModuleId.rfind("pre_", 0) == 0;
+ CHECK_MESSAGE(IsPreModule, Record.ModuleId);
+ }
+ }
+}
+
+# if ZEN_PLATFORM_WINDOWS
+TEST_CASE("hub.job_object")
+{
+ SUBCASE("UseJobObject=true")
+ {
+ ScopedTemporaryDirectory TempDir;
+ Hub::Configuration Config;
+ Config.UseJobObject = true;
+ Config.BasePortNumber = 22100;
+
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config);
+ HubProvisionedInstanceInfo Info;
+ std::string Reason;
+
+ const bool ProvisionResult = HubInstance->Provision("jobobj_a", Info, Reason);
+ REQUIRE_MESSAGE(ProvisionResult, Reason);
+ CHECK_NE(Info.Port, 0);
+
+ const bool DeprovisionResult = HubInstance->Deprovision("jobobj_a", Reason);
+ CHECK(DeprovisionResult);
+ CHECK_EQ(HubInstance->GetInstanceCount(), 0);
+ }
+
+ SUBCASE("UseJobObject=false")
+ {
+ ScopedTemporaryDirectory TempDir;
+ Hub::Configuration Config;
+ Config.UseJobObject = false;
+ Config.BasePortNumber = 22200;
+
+ std::unique_ptr<Hub> HubInstance = hub_testutils::MakeHub(TempDir.Path(), Config);
+ HubProvisionedInstanceInfo Info;
+ std::string Reason;
+
+ const bool ProvisionResult = HubInstance->Provision("nojobobj_a", Info, Reason);
+ REQUIRE_MESSAGE(ProvisionResult, Reason);
+ CHECK_NE(Info.Port, 0);
+
+ const bool DeprovisionResult = HubInstance->Deprovision("nojobobj_a", Reason);
+ CHECK(DeprovisionResult);
+ CHECK_EQ(HubInstance->GetInstanceCount(), 0);
+ }
+}
+# endif // ZEN_PLATFORM_WINDOWS
+
+TEST_SUITE_END();
+
+void
+hub_forcelink()
+{
+}
+
+#endif // ZEN_WITH_TESTS
} // namespace zen
diff --git a/src/zenserver/hub/hub.h b/src/zenserver/hub/hub.h
index caf8890ff..8b61f988d 100644
--- a/src/zenserver/hub/hub.h
+++ b/src/zenserver/hub/hub.h
@@ -22,33 +22,40 @@ class StorageServerInstance;
*
* Core logic for managing storage server instances on behalf of external clients.
*/
+
+struct HubProvisionedInstanceInfo
+{
+ std::string BaseUri;
+ uint16_t Port;
+};
+
class Hub
{
public:
struct Configuration
{
- std::filesystem::path HubBaseDir;
- std::filesystem::path ChildBaseDir;
-
/** Enable or disable the use of a Windows Job Object for child process management.
* When enabled, all spawned child processes are assigned to a job object with
* JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE, ensuring children are terminated if the hub
* crashes or is force-killed. Must be called before Initialize(). No-op on non-Windows.
*/
- bool UseJobObject = true;
+ bool UseJobObject = true;
+ uint16_t BasePortNumber = 21000;
+
+ int InstanceLimit = 1000;
};
- Hub(const Configuration& Config);
+
+ typedef std::function<void(std::string_view ModuleId, const HubProvisionedInstanceInfo& Info)> ProvisionModuleCallbackFunc;
+
+ Hub(const Configuration& Config,
+ ZenServerEnvironment&& RunEnvironment,
+ ProvisionModuleCallbackFunc&& ProvisionedModuleCallback = {},
+ ProvisionModuleCallbackFunc&& DeprovisionedModuleCallback = {});
~Hub();
Hub(const Hub&) = delete;
Hub& operator=(const Hub&) = delete;
- struct ProvisionedInstanceInfo
- {
- std::string BaseUri;
- uint16_t Port;
- };
-
/**
* Provision a storage server instance for the given module ID.
*
@@ -56,7 +63,7 @@ public:
* @param OutInfo If successful, information about the provisioned instance will be returned here.
* @param OutReason If unsuccessful, the reason will be returned here.
*/
- bool Provision(std::string_view ModuleId, ProvisionedInstanceInfo& OutInfo, std::string& OutReason);
+ bool Provision(std::string_view ModuleId, HubProvisionedInstanceInfo& OutInfo, std::string& OutReason);
/**
* Deprovision a storage server instance for the given module ID.
@@ -90,13 +97,20 @@ public:
int GetInstanceCount();
- int GetInstanceLimit() const { return m_InstanceLimit; }
int GetMaxInstanceCount() const { return m_MaxInstanceCount; }
+ const Configuration& GetConfig() const { return m_Config; }
+
private:
- ZenServerEnvironment m_RunEnvironment;
+ const Configuration m_Config;
+ ZenServerEnvironment m_RunEnvironment;
+
+ ProvisionModuleCallbackFunc m_ProvisionedModuleCallback;
+ ProvisionModuleCallbackFunc m_DeprovisionedModuleCallback;
+
std::filesystem::path m_FileHydrationPath;
std::filesystem::path m_HydrationTempPath;
+
#if ZEN_PLATFORM_WINDOWS
JobObject m_JobObject;
#endif
@@ -104,15 +118,17 @@ private:
std::unordered_map<std::string, std::unique_ptr<StorageServerInstance>> m_Instances;
std::unordered_set<std::string> m_DeprovisioningModules;
std::unordered_set<std::string> m_ProvisioningModules;
- int m_MaxInstanceCount = 0;
- int m_InstanceLimit = 1000;
ResourceMetrics m_ResourceLimits;
SystemMetrics m_HostMetrics;
- bool m_UseJobObject = true;
+ int m_MaxInstanceCount = 0;
void UpdateStats();
void UpdateCapacityMetrics();
bool CanProvisionInstance(std::string_view ModuleId, std::string& OutReason);
};
+#if ZEN_WITH_TESTS
+void hub_forcelink();
+#endif // ZEN_WITH_TESTS
+
} // namespace zen
diff --git a/src/zenserver/hub/zenhubserver.cpp b/src/zenserver/hub/zenhubserver.cpp
index 64ab0bd9e..a019ff295 100644
--- a/src/zenserver/hub/zenhubserver.cpp
+++ b/src/zenserver/hub/zenhubserver.cpp
@@ -3,9 +3,10 @@
#include "zenhubserver.h"
#include "frontend/frontend.h"
+#include "httphubservice.h"
#include "hub.h"
-#include "hubservice.h"
+#include <zencore/config.h>
#include <zencore/fmtutils.h>
#include <zencore/memory/llm.h>
#include <zencore/memory/memorytrace.h>
@@ -38,6 +39,35 @@ ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options)
"Instance ID for use in notifications",
cxxopts::value<std::string>(m_ServerOptions.InstanceId)->default_value(""),
"");
+
+ Options.add_option("hub",
+ "",
+ "consul-endpoint",
+ "Consul endpoint URL for service registration (empty = disabled)",
+ cxxopts::value<std::string>(m_ServerOptions.ConsulEndpoint)->default_value(""),
+ "");
+
+ Options.add_option("hub",
+ "",
+ "hub-base-port-number",
+ "Base port number for provisioned instances",
+ cxxopts::value<uint16_t>(m_ServerOptions.HubBasePortNumber)->default_value("21000"),
+ "");
+
+ Options.add_option("hub",
+ "",
+ "hub-instance-limit",
+ "Maximum number of provisioned instances for this hub",
+ cxxopts::value<int>(m_ServerOptions.HubInstanceLimit)->default_value("1000"),
+ "");
+#if ZEN_PLATFORM_WINDOWS
+ Options.add_option("hub",
+ "",
+ "hub-use-job-object",
+ "Enable the use of a Windows Job Object for child process management",
+ cxxopts::value<bool>(m_ServerOptions.HubUseJobObject)->default_value("true"),
+ "");
+#endif // ZEN_PLATFORM_WINDOWS
}
void
@@ -74,6 +104,56 @@ ZenHubServer::~ZenHubServer()
Cleanup();
}
+void
+ZenHubServer::OnProvisioned(std::string_view HubInstanceId, std::string_view ModuleId, const HubProvisionedInstanceInfo& Info)
+{
+ if (m_ConsulClient)
+ {
+ consul::ServiceRegistrationInfo ServiceInfo{
+ .ServiceId = std::string(ModuleId),
+ .ServiceName = "zen-storage",
+ // .Address = "localhost", // Let the consul agent figure out out external address // TODO: Info.BaseUri?
+ .Port = Info.Port,
+ .HealthEndpoint = "health",
+ .Tags = std::vector<std::pair<std::string, std::string>>{std::make_pair("module", std::string(ModuleId)),
+ std::make_pair("zen-hub", std::string(HubInstanceId)),
+ std::make_pair("version", std::string(ZEN_CFG_VERSION))},
+ .HealthIntervalSeconds = 10,
+ .DeregisterAfterSeconds = 30};
+
+ if (!m_ConsulClient->RegisterService(ServiceInfo))
+ {
+ ZEN_WARN("Failed to register storage server instance for module '{}' with Consul, continuing anyway", ModuleId);
+ }
+ else
+ {
+ ZEN_INFO("Registered storage server instance for module '{}' at port {} with Consul as '{}'",
+ ModuleId,
+ Info.Port,
+ ServiceInfo.ServiceName);
+ }
+ }
+}
+
+void
+ZenHubServer::OnDeprovisioned(std::string_view HubInstanceId, std::string_view ModuleId, const HubProvisionedInstanceInfo& Info)
+{
+ ZEN_UNUSED(HubInstanceId);
+ if (m_ConsulClient)
+ {
+ if (!m_ConsulClient->DeregisterService(ModuleId))
+ {
+ ZEN_WARN("Failed to deregister storage server instance for module '{}' at port {} from Consul, continuing anyway",
+ ModuleId,
+ Info.Port);
+ }
+ else
+ {
+ ZEN_INFO("Deregistered storage server instance for module '{}' at port {} from Consul", ModuleId, Info.Port);
+ }
+ }
+}
+
int
ZenHubServer::Initialize(const ZenHubServerConfig& ServerConfig, ZenServerState::ZenServerEntry* ServerEntry)
{
@@ -96,6 +176,7 @@ ZenHubServer::Initialize(const ZenHubServerConfig& ServerConfig, ZenServerState:
m_DebugOptionForcedCrash = ServerConfig.ShouldCrash;
InitializeState(ServerConfig);
+ InitializeConsulRegistration(ServerConfig, EffectiveBasePort);
InitializeServices(ServerConfig);
RegisterServices(ServerConfig);
@@ -127,6 +208,9 @@ ZenHubServer::Cleanup()
m_HubService.reset();
m_ApiService.reset();
m_Hub.reset();
+
+ m_ConsulRegistration.reset();
+ m_ConsulClient.reset();
}
catch (const std::exception& Ex)
{
@@ -147,7 +231,18 @@ ZenHubServer::InitializeServices(const ZenHubServerConfig& ServerConfig)
ZEN_INFO("instantiating Hub");
m_Hub = std::make_unique<Hub>(
- Hub::Configuration{.HubBaseDir = ServerConfig.DataDir / "hub", .ChildBaseDir = ServerConfig.DataDir / "servers"});
+ Hub::Configuration{.UseJobObject = ServerConfig.HubUseJobObject,
+ .BasePortNumber = ServerConfig.HubBasePortNumber,
+ .InstanceLimit = ServerConfig.HubInstanceLimit},
+ ZenServerEnvironment(ZenServerEnvironment::Hub, ServerConfig.DataDir / "hub", ServerConfig.DataDir / "servers"),
+ m_ConsulClient ? [this, HubInstanceId = fmt::format("zen-hub-{}", ServerConfig.InstanceId)](
+ std::string_view ModuleId,
+ const HubProvisionedInstanceInfo& Info) { OnProvisioned(HubInstanceId, ModuleId, Info); }
+ : Hub::ProvisionModuleCallbackFunc{},
+ m_ConsulClient ? [this, HubInstanceId = fmt::format("zen-hub-{}", ServerConfig.InstanceId)](
+ std::string_view ModuleId,
+ const HubProvisionedInstanceInfo& Info) { OnDeprovisioned(HubInstanceId, ModuleId, Info); }
+ : Hub::ProvisionModuleCallbackFunc{});
ZEN_INFO("instantiating API service");
m_ApiService = std::make_unique<zen::HttpApiService>(*m_Http);
@@ -181,6 +276,47 @@ ZenHubServer::RegisterServices(const ZenHubServerConfig& ServerConfig)
}
void
+ZenHubServer::InitializeConsulRegistration(const ZenHubServerConfig& ServerConfig, int EffectivePort)
+{
+ if (ServerConfig.ConsulEndpoint.empty())
+ {
+ ZEN_INFO("Consul registration disabled (no endpoint configured)");
+ return;
+ }
+
+ ZEN_INFO("Initializing Consul registration with endpoint: {}", ServerConfig.ConsulEndpoint);
+
+ try
+ {
+ m_ConsulClient = std::make_unique<consul::ConsulClient>(ServerConfig.ConsulEndpoint);
+
+ consul::ServiceRegistrationInfo Info;
+ Info.ServiceId = fmt::format("zen-hub-{}", ServerConfig.InstanceId);
+ Info.ServiceName = "zen-hub";
+ // Info.Address = "localhost"; // Let the consul agent figure out out external address // TODO: Info.BaseUri?
+ Info.Port = static_cast<uint16_t>(EffectivePort);
+ Info.HealthEndpoint = "hub/health";
+ Info.Tags = std::vector<std::pair<std::string, std::string>>{
+ std::make_pair("zen-hub", Info.ServiceId),
+ std::make_pair("version", std::string(ZEN_CFG_VERSION)),
+ std::make_pair("base-port-number", fmt::format("{}", ServerConfig.HubBasePortNumber)),
+ std::make_pair("instance-limit", fmt::format("{}", ServerConfig.HubInstanceLimit)),
+ std::make_pair("use-job-object", fmt::format("{}", ServerConfig.HubUseJobObject))};
+
+ m_ConsulRegistration = std::make_unique<consul::ServiceRegistration>(m_ConsulClient.get(), Info);
+
+ ZEN_INFO("Consul service registration initiated for service ID: {}", Info.ServiceId);
+ }
+ catch (const std::exception& Ex)
+ {
+ // REQ-F-12: Hub should start successfully even if Consul registration fails
+ ZEN_WARN("Failed to initialize Consul registration (hub will continue without it): {}", Ex.what());
+ m_ConsulRegistration.reset();
+ m_ConsulClient.reset();
+ }
+}
+
+void
ZenHubServer::Run()
{
if (m_ProcessMonitor.IsActive())
@@ -228,6 +364,21 @@ ZenHubServer::Run()
const bool IsInteractiveMode = IsInteractiveSession(); // &&!m_TestMode;
+ if (m_ConsulRegistration)
+ {
+ if (!m_ConsulRegistration->IsRegistered())
+ {
+ ZEN_INFO("Waiting for consul integration to register...");
+ m_ConsulRegistration->WaitForReadyEvent(2000);
+ }
+ if (!m_ConsulRegistration->IsRegistered())
+ {
+ m_ConsulClient.reset();
+ m_ConsulRegistration.reset();
+ ZEN_WARN("Consul registration failed, running without consul integration");
+ }
+ }
+
SetNewState(kRunning);
OnReady();
diff --git a/src/zenserver/hub/zenhubserver.h b/src/zenserver/hub/zenhubserver.h
index a3eeb2ead..f6a3eb1bc 100644
--- a/src/zenserver/hub/zenhubserver.h
+++ b/src/zenserver/hub/zenhubserver.h
@@ -4,6 +4,8 @@
#include "zenserver.h"
+#include <zenutil/consul.h>
+
namespace cxxopts {
class Options;
}
@@ -20,10 +22,15 @@ class HttpHubService;
struct ZenHubServerConfig : public ZenServerConfig
{
std::string UpstreamNotificationEndpoint;
- std::string InstanceId; // For use in notifications
+ std::string InstanceId; // For use in notifications
+ std::string ConsulEndpoint; // If set, enables Consul service registration
+ uint16_t HubBasePortNumber = 21000;
+ int HubInstanceLimit = 1000;
+ bool HubUseJobObject = true;
};
class Hub;
+struct HubProvisionedInstanceInfo;
struct ZenHubServerConfigurator : public ZenServerConfiguratorBase
{
@@ -78,6 +85,9 @@ public:
void SetContentRoot(std::filesystem::path Root) { m_ContentRoot = Root; }
private:
+ void OnProvisioned(std::string_view HubInstanceId, std::string_view ModuleId, const HubProvisionedInstanceInfo& Info);
+ void OnDeprovisioned(std::string_view HubInstanceId, std::string_view ModuleId, const HubProvisionedInstanceInfo& Info);
+
bool m_IsDedicatedMode = false;
bool m_TestMode = false;
std::filesystem::path m_DataRoot;
@@ -90,9 +100,13 @@ private:
std::unique_ptr<HttpApiService> m_ApiService;
std::unique_ptr<HttpFrontendService> m_FrontendService;
+ std::unique_ptr<consul::ConsulClient> m_ConsulClient;
+ std::unique_ptr<consul::ServiceRegistration> m_ConsulRegistration;
+
void InitializeState(const ZenHubServerConfig& ServerConfig);
void InitializeServices(const ZenHubServerConfig& ServerConfig);
void RegisterServices(const ZenHubServerConfig& ServerConfig);
+ void InitializeConsulRegistration(const ZenHubServerConfig& ServerConfig, int EffectivePort);
};
} // namespace zen