aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil/consul/consul.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/zenutil/consul/consul.cpp')
-rw-r--r--src/zenutil/consul/consul.cpp445
1 files changed, 428 insertions, 17 deletions
diff --git a/src/zenutil/consul/consul.cpp b/src/zenutil/consul/consul.cpp
index c9144e589..762f06817 100644
--- a/src/zenutil/consul/consul.cpp
+++ b/src/zenutil/consul/consul.cpp
@@ -9,10 +9,18 @@
#include <zencore/logging.h>
#include <zencore/process.h>
#include <zencore/string.h>
+#include <zencore/testing.h>
+#include <zencore/testutils.h>
#include <zencore/thread.h>
#include <zencore/timer.h>
+#include <zenhttp/httpserver.h>
+
+#include <unordered_set>
+
+ZEN_THIRD_PARTY_INCLUDES_START
#include <fmt/format.h>
+ZEN_THIRD_PARTY_INCLUDES_END
namespace zen::consul {
@@ -31,7 +39,7 @@ struct ConsulProcess::Impl
}
CreateProcOptions Options;
- Options.Flags |= CreateProcOptions::Flag_Windows_NewProcessGroup;
+ Options.Flags |= CreateProcOptions::Flag_NewProcessGroup;
const std::filesystem::path ConsulExe = GetRunningExecutablePath().parent_path() / ("consul" ZEN_EXE_SUFFIX_LITERAL);
CreateProcResult Result = CreateProc(ConsulExe, "consul" ZEN_EXE_SUFFIX_LITERAL " agent -dev", Options);
@@ -107,12 +115,30 @@ ConsulProcess::StopConsulAgent()
//////////////////////////////////////////////////////////////////////////
-ConsulClient::ConsulClient(std::string_view BaseUri, std::string_view Token) : m_Token(Token), m_HttpClient(BaseUri)
+ConsulClient::ConsulClient(const Configuration& Config)
+: m_Config(Config)
+, m_HttpClient(m_Config.BaseUri, HttpClientSettings{.ConnectTimeout = m_Config.ConnectTimeout, .Timeout = m_Config.Timeout}, [this] {
+ return m_Stop.load();
+})
{
+ m_Worker = std::thread(&ConsulClient::WorkerLoop, this);
}
ConsulClient::~ConsulClient()
{
+ try
+ {
+ m_Stop.store(true);
+ m_Wakeup.Set();
+ if (m_Worker.joinable())
+ {
+ m_Worker.join();
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_WARN("ConsulClient::~ConsulClient threw exception: {}", Ex.what());
+ }
}
void
@@ -158,9 +184,27 @@ ConsulClient::DeleteKey(std::string_view Key)
}
}
-bool
+void
ConsulClient::RegisterService(const ServiceRegistrationInfo& Info)
{
+ PendingOp Op{PendingOp::Kind::Register, Info};
+ m_QueueLock.WithExclusiveLock([&] { m_Queue.push_back(std::move(Op)); });
+ m_Wakeup.Set();
+}
+
+void
+ConsulClient::DeregisterService(std::string_view ServiceId)
+{
+ PendingOp Op;
+ Op.Type = PendingOp::Kind::Deregister;
+ Op.Info.ServiceId = std::string(ServiceId);
+ m_QueueLock.WithExclusiveLock([&] { m_Queue.push_back(std::move(Op)); });
+ m_Wakeup.Set();
+}
+
+bool
+ConsulClient::DoRegister(const ServiceRegistrationInfo& Info)
+{
using namespace std::literals;
HttpClient::KeyValueMap AdditionalHeaders;
@@ -193,12 +237,18 @@ ConsulClient::RegisterService(const ServiceRegistrationInfo& Info)
// when no interval is configured (e.g. during Provisioning).
Writer.BeginObject("Check"sv);
{
- Writer.AddString("HTTP"sv, fmt::format("http://{}:{}/{}", Info.Address, Info.Port, Info.HealthEndpoint));
+ Writer.AddString(
+ "HTTP"sv,
+ fmt::format("http://{}:{}/{}", Info.Address.empty() ? "localhost" : Info.Address, Info.Port, Info.HealthEndpoint));
Writer.AddString("Interval"sv, fmt::format("{}s", Info.HealthIntervalSeconds));
if (Info.DeregisterAfterSeconds != 0)
{
Writer.AddString("DeregisterCriticalServiceAfter"sv, fmt::format("{}s", Info.DeregisterAfterSeconds));
}
+ if (!Info.InitialStatus.empty())
+ {
+ Writer.AddString("Status"sv, Info.InitialStatus);
+ }
}
Writer.EndObject(); // Check
}
@@ -213,7 +263,7 @@ ConsulClient::RegisterService(const ServiceRegistrationInfo& Info)
if (!Result)
{
- ZEN_WARN("ConsulClient::RegisterService() failed to register service '{}' ({})", Info.ServiceId, Result.ErrorMessage(""));
+ ZEN_WARN("ConsulClient::DoRegister() failed to register service '{}' ({})", Info.ServiceId, Result.ErrorMessage(""));
return false;
}
@@ -221,29 +271,114 @@ ConsulClient::RegisterService(const ServiceRegistrationInfo& Info)
}
bool
-ConsulClient::DeregisterService(std::string_view ServiceId)
+ConsulClient::DoDeregister(std::string_view ServiceId)
{
+ using namespace std::literals;
+
HttpClient::KeyValueMap AdditionalHeaders;
ApplyCommonHeaders(AdditionalHeaders);
AdditionalHeaders.Entries.emplace(HttpClient::Accept(HttpContentType::kJSON));
- HttpClient::Response Result = m_HttpClient.Put(fmt::format("v1/agent/service/deregister/{}", ServiceId), AdditionalHeaders);
+ HttpClient::Response Result = m_HttpClient.Put(fmt::format("v1/agent/service/deregister/{}", ServiceId), IoBuffer{}, AdditionalHeaders);
+ if (Result)
+ {
+ return true;
+ }
+
+ // Agent deregister failed - fall back to catalog deregister.
+ // This handles cases where the service was registered via a different Consul agent
+ // (e.g. load-balanced endpoint routing to different agents).
+ std::string NodeName = GetNodeName();
+ if (!NodeName.empty())
+ {
+ CbObjectWriter Writer;
+ Writer.AddString("Node"sv, NodeName);
+ Writer.AddString("ServiceID"sv, ServiceId);
+
+ ExtendableStringBuilder<256> SB;
+ CompactBinaryToJson(Writer.Save(), SB);
+
+ IoBuffer PayloadBuffer(IoBuffer::Wrap, SB.Data(), SB.Size());
+ PayloadBuffer.SetContentType(HttpContentType::kJSON);
+
+ HttpClient::Response CatalogResult = m_HttpClient.Put("v1/catalog/deregister", PayloadBuffer, AdditionalHeaders);
+ if (CatalogResult)
+ {
+ ZEN_INFO("ConsulClient::DoDeregister() deregistered service '{}' via catalog fallback (agent error: {})",
+ ServiceId,
+ Result.ErrorMessage(""));
+ return true;
+ }
+
+ ZEN_WARN("ConsulClient::DoDeregister() failed to deregister service '{}' (agent: {}, catalog: {})",
+ ServiceId,
+ Result.ErrorMessage(""),
+ CatalogResult.ErrorMessage(""));
+ }
+ else
+ {
+ ZEN_WARN(
+ "ConsulClient::DoDeregister() failed to deregister service '{}' (agent: {}, could not determine node name for catalog "
+ "fallback)",
+ ServiceId,
+ Result.ErrorMessage(""));
+ }
+
+ return false;
+}
+std::string
+ConsulClient::GetNodeName()
+{
+ using namespace std::literals;
+
+ HttpClient::KeyValueMap AdditionalHeaders;
+ ApplyCommonHeaders(AdditionalHeaders);
+
+ HttpClient::Response Result = m_HttpClient.Get("v1/agent/self", AdditionalHeaders);
if (!Result)
{
- ZEN_WARN("ConsulClient::DeregisterService() failed to deregister service '{}' ({})", ServiceId, Result.ErrorMessage(""));
- return false;
+ return {};
}
- return true;
+ std::string JsonError;
+ CbFieldIterator Root = LoadCompactBinaryFromJson(Result.AsText(), JsonError);
+ if (!Root || !JsonError.empty())
+ {
+ return {};
+ }
+
+ for (CbFieldView Field : Root)
+ {
+ if (Field.GetName() == "Config"sv)
+ {
+ CbObjectView Config = Field.AsObjectView();
+ if (Config)
+ {
+ return std::string(Config["NodeName"sv].AsString());
+ }
+ }
+ }
+
+ return {};
}
void
ConsulClient::ApplyCommonHeaders(HttpClient::KeyValueMap& InOutHeaderMap)
{
- if (!m_Token.empty())
+ std::string Token;
+ if (!m_Config.StaticToken.empty())
+ {
+ Token = m_Config.StaticToken;
+ }
+ else if (!m_Config.TokenEnvName.empty())
+ {
+ Token = GetEnvVariable(m_Config.TokenEnvName);
+ }
+
+ if (!Token.empty())
{
- InOutHeaderMap.Entries.emplace("X-Consul-Token", m_Token);
+ InOutHeaderMap.Entries.emplace("X-Consul-Token", Token);
}
}
@@ -295,8 +430,10 @@ ConsulClient::WatchService(std::string_view ServiceId, uint64_t& InOutIndex, int
HttpClient::KeyValueMap AdditionalHeaders;
ApplyCommonHeaders(AdditionalHeaders);
- // Note: m_HttpClient uses unlimited HTTP timeout (Timeout{0}); the WaitSeconds parameter
- // governs the server-side bound on the blocking query. Do not add a separate client timeout.
+ // Note: m_HttpClient runs with a 500ms client-side timeout to keep Register/Deregister from
+ // stalling the hub state machine when the agent is unreachable. That bound applies here too:
+ // WaitSeconds is effectively capped at ~500ms regardless of the argument, so callers must
+ // treat this as a short-poll and loop rather than rely on a true blocking query.
HttpClient::KeyValueMap Parameters({{"index", std::to_string(InOutIndex)}, {"wait", fmt::format("{}s", WaitSeconds)}});
HttpClient::Response Result = m_HttpClient.Get("v1/agent/services", AdditionalHeaders, Parameters);
if (!Result)
@@ -345,6 +482,82 @@ ConsulClient::GetAgentChecksJson()
return Result.ToText();
}
+void
+ConsulClient::WorkerLoop()
+{
+ SetCurrentThreadName("ConsulClient");
+
+ std::unordered_set<std::string> RegisteredServices;
+
+ while (true)
+ {
+ m_Wakeup.Wait(-1);
+ m_Wakeup.Reset();
+
+ const bool Stopping = m_Stop.load();
+
+ std::vector<PendingOp> Batch;
+ m_QueueLock.WithExclusiveLock([&] { Batch.swap(m_Queue); });
+
+ for (size_t Index = 0; Index < Batch.size(); ++Index)
+ {
+ PendingOp& Op = Batch[Index];
+
+ if (Stopping && Op.Type == PendingOp::Kind::Register)
+ {
+ continue;
+ }
+
+ const std::string_view OpName = (Op.Type == PendingOp::Kind::Register) ? "register" : "deregister";
+
+ try
+ {
+ if (Op.Type == PendingOp::Kind::Register)
+ {
+ bool Ok = DoRegister(Op.Info);
+ if (Ok)
+ {
+ RegisteredServices.insert(Op.Info.ServiceId);
+ }
+ else
+ {
+ const size_t Remaining = Batch.size() - Index - 1;
+ ZEN_WARN("ConsulClient worker: {} for '{}' failed; dropping {} remaining queued op(s)",
+ OpName,
+ Op.Info.ServiceId,
+ Remaining);
+ break;
+ }
+ }
+ else
+ {
+ ZEN_ASSERT(Op.Type == PendingOp::Kind::Deregister);
+ if (RegisteredServices.erase(Op.Info.ServiceId) == 1u)
+ {
+ if (!DoDeregister(Op.Info.ServiceId))
+ {
+ ZEN_WARN("ConsulClient worker: {} for '{}' failed", OpName, Op.Info.ServiceId);
+ }
+ }
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_WARN("ConsulClient worker: {} for '{}' threw: {}", OpName, Op.Info.ServiceId, Ex.what());
+ }
+ catch (...)
+ {
+ ZEN_WARN("ConsulClient worker: {} for '{}' threw unknown exception", OpName, Op.Info.ServiceId);
+ }
+ }
+
+ if (Stopping)
+ {
+ break;
+ }
+ }
+}
+
//////////////////////////////////////////////////////////////////////////
ServiceRegistration::ServiceRegistration(ConsulClient* Client, const ServiceRegistrationInfo& Info) : m_Client(Client), m_Info(Info)
@@ -365,7 +578,7 @@ ServiceRegistration::~ServiceRegistration()
if (m_IsRegistered.load())
{
- if (!m_Client->DeregisterService(m_Info.ServiceId))
+ if (!m_Client->DoDeregister(m_Info.ServiceId))
{
ZEN_WARN("ServiceRegistration: Failed to deregister service '{}' during cleanup", m_Info.ServiceId);
}
@@ -405,7 +618,7 @@ ServiceRegistration::RegistrationLoop()
// Try to register with exponential backoff
for (int Attempt = 0; Attempt < MaxAttempts; ++Attempt)
{
- if (m_Client->RegisterService(m_Info))
+ if (m_Client->DoRegister(m_Info))
{
Succeeded = true;
break;
@@ -417,7 +630,7 @@ ServiceRegistration::RegistrationLoop()
}
}
- if (Succeeded || m_Client->RegisterService(m_Info))
+ if (Succeeded || m_Client->DoRegister(m_Info))
{
break;
}
@@ -446,4 +659,202 @@ ServiceRegistration::RegistrationLoop()
}
}
+//////////////////////////////////////////////////////////////////////////
+// Tests
+
+#if ZEN_WITH_TESTS
+
+void
+consul_forcelink()
+{
+}
+
+struct MockHealthService : public HttpService
+{
+ std::atomic<bool> FailHealth{false};
+ std::atomic<int> HealthCheckCount{0};
+
+ const char* BaseUri() const override { return "/"; }
+
+ void HandleRequest(HttpServerRequest& Request) override
+ {
+ std::string_view Uri = Request.RelativeUri();
+ if (Uri == "health/" || Uri == "health")
+ {
+ HealthCheckCount.fetch_add(1);
+ if (FailHealth.load())
+ {
+ Request.WriteResponse(HttpResponseCode::ServiceUnavailable);
+ }
+ else
+ {
+ Request.WriteResponse(HttpResponseCode::OK, HttpContentType::kText, "ok");
+ }
+ return;
+ }
+ Request.WriteResponse(HttpResponseCode::NotFound);
+ }
+};
+
+struct TestHealthServer
+{
+ MockHealthService Mock;
+
+ void Start()
+ {
+ m_TmpDir.emplace();
+ m_Server = CreateHttpServer(HttpServerConfig{.ServerClass = "asio"});
+ m_Port = m_Server->Initialize(0, m_TmpDir->Path() / "http");
+ REQUIRE(m_Port != -1);
+ m_Server->RegisterService(Mock);
+ m_ServerThread = std::thread([this]() { m_Server->Run(false); });
+ }
+
+ int Port() const { return m_Port; }
+
+ ~TestHealthServer()
+ {
+ if (m_Server)
+ {
+ m_Server->RequestExit();
+ }
+ if (m_ServerThread.joinable())
+ {
+ m_ServerThread.join();
+ }
+ if (m_Server)
+ {
+ m_Server->Close();
+ }
+ }
+
+private:
+ std::optional<ScopedTemporaryDirectory> m_TmpDir;
+ Ref<HttpServer> m_Server;
+ std::thread m_ServerThread;
+ int m_Port = -1;
+};
+
+static bool
+WaitForCondition(std::function<bool()> Predicate, int TimeoutMs, int PollIntervalMs = 200)
+{
+ Stopwatch Timer;
+ while (Timer.GetElapsedTimeMs() < static_cast<uint64_t>(TimeoutMs))
+ {
+ if (Predicate())
+ {
+ return true;
+ }
+ Sleep(PollIntervalMs);
+ }
+ return Predicate();
+}
+
+static std::string
+GetCheckStatus(ConsulClient& Client, std::string_view ServiceId)
+{
+ using namespace std::literals;
+
+ std::string JsonError;
+ CbFieldIterator ChecksRoot = LoadCompactBinaryFromJson(Client.GetAgentChecksJson(), JsonError);
+ if (!ChecksRoot || !JsonError.empty())
+ {
+ return {};
+ }
+
+ for (CbFieldView F : ChecksRoot)
+ {
+ if (!F.IsObject())
+ {
+ continue;
+ }
+ for (CbFieldView C : F.AsObjectView())
+ {
+ CbObjectView Check = C.AsObjectView();
+ if (Check["ServiceID"sv].AsString() == ServiceId)
+ {
+ return std::string(Check["Status"sv].AsString());
+ }
+ }
+ }
+ return {};
+}
+
+TEST_SUITE_BEGIN("util.consul");
+
+TEST_CASE("util.consul.service_lifecycle")
+{
+ ConsulProcess ConsulProc;
+ ConsulProc.SpawnConsulAgent();
+
+ TestHealthServer HealthServer;
+ HealthServer.Start();
+
+ ConsulClient Client(
+ {.BaseUri = "http://localhost:8500/", .ConnectTimeout = std::chrono::seconds{5}, .Timeout = std::chrono::seconds{5}});
+
+ const std::string ServiceId = "test-health-svc";
+
+ ServiceRegistrationInfo Info;
+ Info.ServiceId = ServiceId;
+ Info.ServiceName = "zen-test-health";
+ Info.Address = "127.0.0.1";
+ Info.Port = static_cast<uint16_t>(HealthServer.Port());
+ Info.HealthEndpoint = "health/";
+ Info.HealthIntervalSeconds = 1;
+ Info.DeregisterAfterSeconds = 60;
+
+ // Register/Deregister are async; wait for the worker to propagate to Consul.
+
+ // Phase 1: Register and verify Consul sends health checks to our service
+ Client.RegisterService(Info);
+ REQUIRE(WaitForCondition([&]() { return Client.HasService(ServiceId); }, 10000, 50));
+
+ REQUIRE(WaitForCondition([&]() { return HealthServer.Mock.HealthCheckCount.load() >= 1; }, 10000, 50));
+ CHECK(HealthServer.Mock.HealthCheckCount.load() >= 1);
+ CHECK_EQ(GetCheckStatus(Client, ServiceId), "passing");
+
+ // Phase 2: Explicit deregister
+ Client.DeregisterService(ServiceId);
+ REQUIRE(WaitForCondition([&]() { return !Client.HasService(ServiceId); }, 10000, 50));
+
+ // Phase 3: Register with InitialStatus, verify immediately passing before any health check fires,
+ // then fail health and verify check goes critical
+ HealthServer.Mock.HealthCheckCount.store(0);
+ HealthServer.Mock.FailHealth.store(false);
+
+ Info.InitialStatus = "passing";
+ Client.RegisterService(Info);
+ REQUIRE(WaitForCondition([&]() { return Client.HasService(ServiceId); }, 10000, 50));
+
+ // Registration is async; by the time HasService observes the service the 1s health interval
+ // may already have fired, so we can't robustly assert HealthCheckCount==0. The "passing" status
+ // below still proves InitialStatus applied (it can only be "passing" via InitialStatus or a
+ // successful health check - both are acceptable demonstrations).
+ CHECK_EQ(GetCheckStatus(Client, ServiceId), "passing");
+
+ REQUIRE(WaitForCondition([&]() { return HealthServer.Mock.HealthCheckCount.load() >= 1; }, 10000, 50));
+ CHECK_EQ(GetCheckStatus(Client, ServiceId), "passing");
+
+ HealthServer.Mock.FailHealth.store(true);
+
+ // Wait for Consul to observe the failing check
+ REQUIRE(WaitForCondition([&]() { return GetCheckStatus(Client, ServiceId) == "critical"; }, 10000, 50));
+ CHECK_EQ(GetCheckStatus(Client, ServiceId), "critical");
+
+ // Phase 4: Explicit deregister while critical
+ Client.DeregisterService(ServiceId);
+ REQUIRE(WaitForCondition([&]() { return !Client.HasService(ServiceId); }, 10000, 50));
+
+ // Phase 5: Deregister an already-deregistered service - should not crash
+ Client.DeregisterService(ServiceId);
+ REQUIRE(WaitForCondition([&]() { return !Client.HasService(ServiceId); }, 10000, 50));
+
+ ConsulProc.StopConsulAgent();
+}
+
+TEST_SUITE_END();
+
+#endif
+
} // namespace zen::consul