aboutsummaryrefslogtreecommitdiff
path: root/zenserver/upstream/upstreamcache.cpp
diff options
context:
space:
mode:
authorPer Larsson <[email protected]>2022-01-24 11:11:10 +0100
committerPer Larsson <[email protected]>2022-01-24 11:11:10 +0100
commitdc6becffb513280170958f94e18c1b2966ade4d1 (patch)
treec7f9cccafcc21e241abdecde6f5219ab1009aff6 /zenserver/upstream/upstreamcache.cpp
parentFormat fix. (diff)
downloadzen-dc6becffb513280170958f94e18c1b2966ade4d1.tar.xz
zen-dc6becffb513280170958f94e18c1b2966ade4d1.zip
Refactored upstream cache to better handle different states in prep for dynamic auth tokens.
Diffstat (limited to 'zenserver/upstream/upstreamcache.cpp')
-rw-r--r--zenserver/upstream/upstreamcache.cpp690
1 files changed, 376 insertions, 314 deletions
diff --git a/zenserver/upstream/upstreamcache.cpp b/zenserver/upstream/upstreamcache.cpp
index 65624ef17..3d6641a4f 100644
--- a/zenserver/upstream/upstreamcache.cpp
+++ b/zenserver/upstream/upstreamcache.cpp
@@ -25,6 +25,7 @@
#include <algorithm>
#include <atomic>
+#include <shared_mutex>
#include <thread>
#include <unordered_map>
@@ -34,6 +35,52 @@ using namespace std::literals;
namespace detail {
+ class UpstreamStatus
+ {
+ public:
+ UpstreamEndpointState EndpointState() const { return static_cast<UpstreamEndpointState>(m_State.load(std::memory_order_relaxed)); }
+
+ UpstreamEndpointStatus EndpointStatus() const
+ {
+ const UpstreamEndpointState State = EndpointState();
+ {
+ std::unique_lock _(m_Mutex);
+ return {.Reason = m_ErrorText, .State = State};
+ }
+ }
+
+ void Set(UpstreamEndpointState NewState)
+ {
+ m_State.store(static_cast<uint32_t>(NewState), std::memory_order_relaxed);
+ {
+ std::unique_lock _(m_Mutex);
+ m_ErrorText.clear();
+ }
+ }
+
+ void Set(UpstreamEndpointState NewState, std::string ErrorText)
+ {
+ m_State.store(static_cast<uint32_t>(NewState), std::memory_order_relaxed);
+ {
+ std::unique_lock _(m_Mutex);
+ m_ErrorText = std::move(ErrorText);
+ }
+ }
+
+ void SetFromErrorCode(int32_t ErrorCode, std::string_view ErrorText)
+ {
+ if (ErrorCode != 0)
+ {
+ Set(ErrorCode == 401 ? UpstreamEndpointState::kUnauthorized : UpstreamEndpointState::kError, std::string(ErrorText));
+ }
+ }
+
+ private:
+ mutable std::mutex m_Mutex;
+ std::string m_ErrorText;
+ std::atomic_uint32_t m_State;
+ };
+
class JupiterUpstreamEndpoint final : public UpstreamEndpoint
{
public:
@@ -41,7 +88,8 @@ namespace detail {
: m_Log(zen::logging::Get("upstream"))
, m_UseLegacyDdc(Options.UseLegacyDdc)
{
- m_Info.Name = "Horde"sv;
+ ZEN_ASSERT(!Options.Name.empty());
+ m_Info.Name = Options.Name;
m_Info.Url = Options.ServiceUrl;
m_Client = new CloudCacheClient(Options);
}
@@ -50,27 +98,45 @@ namespace detail {
virtual const UpstreamEndpointInfo& GetEndpointInfo() const override { return m_Info; }
- virtual UpstreamEndpointHealth Initialize() override { return CheckHealth(); }
-
- virtual bool IsHealthy() const override { return m_HealthOk.load(); }
-
- virtual UpstreamEndpointHealth CheckHealth() override
+ virtual UpstreamEndpointStatus Initialize() override
{
try
{
+ if (m_Status.EndpointState() == UpstreamEndpointState::kOk)
+ {
+ return {.State = UpstreamEndpointState::kOk};
+ }
+
CloudCacheSession Session(m_Client);
const CloudCacheResult Result = Session.Authenticate();
- m_HealthOk = Result.Success && Result.ErrorCode == 0;
+ if (Result.Success)
+ {
+ m_Status.Set(UpstreamEndpointState::kOk);
+ }
+ else if (Result.ErrorCode != 0)
+ {
+ m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason);
+ }
+ else
+ {
+ m_Status.Set(UpstreamEndpointState::kUnauthorized);
+ }
- return {.Reason = std::move(Result.Reason), .Ok = Result.Success};
+ return m_Status.EndpointStatus();
}
catch (std::exception& Err)
{
- return {.Reason = Err.what(), .Ok = false};
+ m_Status.Set(UpstreamEndpointState::kError, Err.what());
+
+ return {.Reason = Err.what(), .State = GetState()};
}
}
+ virtual UpstreamEndpointState GetState() override { return m_Status.EndpointState(); }
+
+ virtual UpstreamEndpointStatus GetStatus() override { return m_Status.EndpointStatus(); }
+
virtual GetUpstreamCacheResult GetCacheRecord(CacheKey CacheKey, ZenContentType Type) override
{
ZEN_TRACE_CPU("Upstream::Horde::GetSingleCacheRecord");
@@ -127,6 +193,8 @@ namespace detail {
}
}
+ m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason);
+
if (Result.ErrorCode == 0)
{
return {.Value = Result.Response,
@@ -136,13 +204,13 @@ namespace detail {
}
else
{
- m_HealthOk = false;
return {.Error{.ErrorCode = Result.ErrorCode, .Reason = std::move(Result.Reason)}};
}
}
catch (std::exception& Err)
{
- m_HealthOk = false;
+ m_Status.Set(UpstreamEndpointState::kError, Err.what());
+
return {.Error{.ErrorCode = -1, .Reason = Err.what()}};
}
}
@@ -170,6 +238,8 @@ namespace detail {
CloudCacheResult RefResult = Session.GetRef(CacheKey.Bucket, CacheKey.Hash, ZenContentType::kCbObject);
AppendResult(RefResult, Result);
+ m_Status.SetFromErrorCode(RefResult.ErrorCode, RefResult.Reason);
+
if (RefResult.ErrorCode == 0)
{
const CbValidateError ValidationResult = ValidateCompactBinary(RefResult.Response, CbValidateMode::All);
@@ -180,6 +250,8 @@ namespace detail {
CloudCacheResult BlobResult = Session.GetCompressedBlob(AttachmentHash.AsHash());
AppendResult(BlobResult, Result);
+ m_Status.SetFromErrorCode(BlobResult.ErrorCode, BlobResult.Reason);
+
if (BlobResult.ErrorCode == 0)
{
if (CompressedBuffer Chunk = CompressedBuffer::FromCompressed(SharedBuffer(BlobResult.Response)))
@@ -187,17 +259,9 @@ namespace detail {
Package.AddAttachment(CbAttachment(Chunk));
}
}
- else
- {
- m_HealthOk = false;
- }
});
}
}
- else
- {
- m_HealthOk = false;
- }
}
OnComplete({.Key = CacheKey, .KeyIndex = Index, .Record = Record, .Package = Package});
@@ -215,6 +279,8 @@ namespace detail {
CloudCacheSession Session(m_Client);
const CloudCacheResult Result = Session.GetCompressedBlob(PayloadId);
+ m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason);
+
if (Result.ErrorCode == 0)
{
return {.Value = Result.Response,
@@ -224,13 +290,13 @@ namespace detail {
}
else
{
- m_HealthOk = false;
return {.Error{.ErrorCode = Result.ErrorCode, .Reason = std::move(Result.Reason)}};
}
}
catch (std::exception& Err)
{
- m_HealthOk = false;
+ m_Status.Set(UpstreamEndpointState::kError, Err.what());
+
return {.Error{.ErrorCode = -1, .Reason = Err.what()}};
}
}
@@ -255,7 +321,8 @@ namespace detail {
Payload = BlobResult.Response;
AppendResult(BlobResult, Result);
- m_HealthOk = BlobResult.ErrorCode == 0;
+
+ m_Status.SetFromErrorCode(BlobResult.ErrorCode, BlobResult.Reason);
}
OnComplete({.Request = Request, .RequestIndex = Index, .Payload = Payload});
@@ -292,7 +359,7 @@ namespace detail {
}
}
- m_HealthOk = Result.ErrorCode == 0;
+ m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason);
return {.Reason = std::move(Result.Reason),
.Bytes = Result.Bytes,
@@ -323,7 +390,7 @@ namespace detail {
BlobResult = Session.PutCompressedBlob(CacheRecord.PayloadIds[Idx], Payloads[Idx]);
}
- m_HealthOk = BlobResult.ErrorCode == 0;
+ m_Status.SetFromErrorCode(BlobResult.ErrorCode, BlobResult.Reason);
if (!BlobResult.Success)
{
@@ -344,7 +411,7 @@ namespace detail {
RefResult = Session.PutRef(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RecordValue, ZenContentType::kCbObject);
}
- m_HealthOk = RefResult.ErrorCode == 0;
+ m_Status.SetFromErrorCode(RefResult.ErrorCode, RefResult.Reason);
if (!RefResult.Success)
{
@@ -366,7 +433,8 @@ namespace detail {
const IoHash RefHash = IoHash::HashBuffer(RecordValue);
FinalizeRefResult FinalizeResult = Session.FinalizeRef(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RefHash);
- m_HealthOk = FinalizeResult.ErrorCode == 0;
+
+ m_Status.SetFromErrorCode(FinalizeResult.ErrorCode, FinalizeResult.Reason);
if (!FinalizeResult.Success)
{
@@ -385,7 +453,8 @@ namespace detail {
}
FinalizeResult = Session.FinalizeRef(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RefHash);
- m_HealthOk = FinalizeResult.ErrorCode == 0;
+
+ m_Status.SetFromErrorCode(FinalizeResult.ErrorCode, FinalizeResult.Reason);
if (!FinalizeResult.Success)
{
@@ -420,7 +489,8 @@ namespace detail {
}
catch (std::exception& Err)
{
- m_HealthOk = false;
+ m_Status.Set(UpstreamEndpointState::kError, Err.what());
+
return {.Reason = std::string(Err.what()), .Success = false};
}
}
@@ -444,11 +514,10 @@ namespace detail {
spdlog::logger& m_Log;
UpstreamEndpointInfo m_Info;
+ UpstreamStatus m_Status;
+ UpstreamEndpointStats m_Stats;
bool m_UseLegacyDdc;
- std::string m_DisplayName;
RefPtr<CloudCacheClient> m_Client;
- UpstreamEndpointStats m_Stats;
- std::atomic_bool m_HealthOk{false};
};
class ZenUpstreamEndpoint final : public UpstreamEndpoint
@@ -466,7 +535,7 @@ namespace detail {
public:
ZenUpstreamEndpoint(const ZenStructuredCacheClientOptions& Options)
: m_Log(zen::logging::Get("upstream"))
- , m_Info({.Name = std::string("Zen")})
+ , m_Info({.Name = std::string(Options.Name)})
, m_ConnectTimeout(Options.ConnectTimeout)
, m_Timeout(Options.Timeout)
{
@@ -480,62 +549,43 @@ namespace detail {
virtual const UpstreamEndpointInfo& GetEndpointInfo() const override { return m_Info; }
- virtual UpstreamEndpointHealth Initialize() override
- {
- const ZenEndpoint& Ep = GetEndpoint();
- if (Ep.Ok)
- {
- m_Info.Url = Ep.Url;
- m_Client = new ZenStructuredCacheClient({.Url = m_Info.Url, .ConnectTimeout = m_ConnectTimeout, .Timeout = m_Timeout});
-
- m_HealthOk = true;
- return {.Ok = true};
- }
-
- m_HealthOk = false;
- return {.Reason = Ep.Reason};
- }
-
- virtual bool IsHealthy() const override { return m_HealthOk; }
-
- virtual UpstreamEndpointHealth CheckHealth() override
+ virtual UpstreamEndpointStatus Initialize() override
{
try
{
- if (m_Client.IsNull())
+ if (m_Status.EndpointState() == UpstreamEndpointState::kOk)
{
- const ZenEndpoint& Ep = GetEndpoint();
- if (Ep.Ok)
- {
- m_Info.Url = Ep.Url;
- m_Client =
- new ZenStructuredCacheClient({.Url = m_Info.Url, .ConnectTimeout = m_ConnectTimeout, .Timeout = m_Timeout});
-
- m_HealthOk = true;
- return {.Ok = true};
- }
-
- return {.Reason = Ep.Reason};
+ return {.State = UpstreamEndpointState::kOk};
}
- ZenStructuredCacheSession Session(*m_Client);
- ZenCacheResult Result;
+ const ZenEndpoint& Ep = GetEndpoint();
- for (int32_t Attempt = 0, MaxAttempts = 2; Attempt < MaxAttempts && !Result.Success; ++Attempt)
+ m_Info.Url = Ep.Url;
+
+ if (Ep.Ok)
{
- Result = Session.CheckHealth();
+ m_Client = new ZenStructuredCacheClient({.Url = m_Info.Url, .ConnectTimeout = m_ConnectTimeout, .Timeout = m_Timeout});
+ m_Status.Set(UpstreamEndpointState::kOk);
+ }
+ else
+ {
+ m_Status.Set(UpstreamEndpointState::kError, Ep.Reason);
}
- m_HealthOk = Result.ErrorCode == 0;
-
- return {.Reason = std::move(Result.Reason), .Ok = m_HealthOk};
+ return m_Status.EndpointStatus();
}
catch (std::exception& Err)
{
- return {.Reason = Err.what(), .Ok = false};
+ m_Status.Set(UpstreamEndpointState::kError, Err.what());
+
+ return {.Reason = Err.what(), .State = GetState()};
}
}
+ virtual UpstreamEndpointState GetState() override { return m_Status.EndpointState(); }
+
+ virtual UpstreamEndpointStatus GetStatus() override { return m_Status.EndpointStatus(); }
+
virtual GetUpstreamCacheResult GetCacheRecord(CacheKey CacheKey, ZenContentType Type) override
{
ZEN_TRACE_CPU("Upstream::Zen::GetSingleCacheRecord");
@@ -545,6 +595,8 @@ namespace detail {
ZenStructuredCacheSession Session(*m_Client);
const ZenCacheResult Result = Session.GetCacheRecord(CacheKey.Bucket, CacheKey.Hash, Type);
+ m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason);
+
if (Result.ErrorCode == 0)
{
return {.Value = Result.Response,
@@ -554,13 +606,13 @@ namespace detail {
}
else
{
- m_HealthOk = false;
return {.Error{.ErrorCode = Result.ErrorCode, .Reason = std::move(Result.Reason)}};
}
}
catch (std::exception& Err)
{
- m_HealthOk = false;
+ m_Status.Set(UpstreamEndpointState::kError, Err.what());
+
return {.Error{.ErrorCode = -1, .Reason = Err.what()}};
}
}
@@ -608,6 +660,8 @@ namespace detail {
Result = Session.InvokeRpc(BatchRequest.Save());
}
+ m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason);
+
if (Result.Success)
{
if (BatchResponse.TryLoad(Result.Response))
@@ -621,10 +675,6 @@ namespace detail {
return {.Bytes = Result.Bytes, .ElapsedSeconds = Result.ElapsedSeconds, .Success = true};
}
}
- else if (Result.ErrorCode)
- {
- m_HealthOk = false;
- }
for (size_t Index : KeyIndex)
{
@@ -643,6 +693,8 @@ namespace detail {
ZenStructuredCacheSession Session(*m_Client);
const ZenCacheResult Result = Session.GetCachePayload(CacheKey.Bucket, CacheKey.Hash, PayloadId);
+ m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason);
+
if (Result.ErrorCode == 0)
{
return {.Value = Result.Response,
@@ -652,13 +704,13 @@ namespace detail {
}
else
{
- m_HealthOk = false;
return {.Error{.ErrorCode = Result.ErrorCode, .Reason = std::move(Result.Reason)}};
}
}
catch (std::exception& Err)
{
- m_HealthOk = false;
+ m_Status.Set(UpstreamEndpointState::kError, Err.what());
+
return {.Error{.ErrorCode = -1, .Reason = Err.what()}};
}
}
@@ -713,6 +765,8 @@ namespace detail {
Result = Session.InvokeRpc(BatchRequest.Save());
}
+ m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason);
+
if (Result.Success)
{
if (BatchResponse.TryLoad(Result.Response))
@@ -736,10 +790,6 @@ namespace detail {
return {.Bytes = Result.Bytes, .ElapsedSeconds = Result.ElapsedSeconds, .Success = true};
}
}
- else if (Result.ErrorCode)
- {
- m_HealthOk = false;
- }
for (size_t Index : RequestIndex)
{
@@ -789,10 +839,10 @@ namespace detail {
for (uint32_t Attempt = 0; Attempt < MaxAttempts && !Result.Success; Attempt++)
{
Result = Session.PutCacheRecord(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, PackagePayload, CacheRecord.Type);
-
- m_HealthOk = Result.ErrorCode == 0;
}
+ m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason);
+
TotalBytes = Result.Bytes;
TotalElapsedSeconds = Result.ElapsedSeconds;
}
@@ -807,10 +857,10 @@ namespace detail {
CacheRecord.Key.Hash,
CacheRecord.PayloadIds[Idx],
Payloads[Idx]);
-
- m_HealthOk = Result.ErrorCode == 0;
}
+ m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason);
+
TotalBytes += Result.Bytes;
TotalElapsedSeconds += Result.ElapsedSeconds;
@@ -827,10 +877,10 @@ namespace detail {
for (uint32_t Attempt = 0; Attempt < MaxAttempts && !Result.Success; Attempt++)
{
Result = Session.PutCacheRecord(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RecordValue, CacheRecord.Type);
-
- m_HealthOk = Result.ErrorCode == 0;
}
+ m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason);
+
TotalBytes += Result.Bytes;
TotalElapsedSeconds += Result.ElapsedSeconds;
}
@@ -842,7 +892,8 @@ namespace detail {
}
catch (std::exception& Err)
{
- m_HealthOk = false;
+ m_Status.Set(UpstreamEndpointState::kError, Err.what());
+
return {.Reason = std::string(Err.what()), .Success = false};
}
}
@@ -885,109 +936,18 @@ namespace detail {
spdlog::logger& m_Log;
UpstreamEndpointInfo m_Info;
+ UpstreamStatus m_Status;
+ UpstreamEndpointStats m_Stats;
std::vector<ZenEndpoint> m_Endpoints;
std::chrono::milliseconds m_ConnectTimeout;
std::chrono::milliseconds m_Timeout;
RefPtr<ZenStructuredCacheClient> m_Client;
- UpstreamEndpointStats m_Stats;
- std::atomic_bool m_HealthOk{false};
};
} // namespace detail
//////////////////////////////////////////////////////////////////////////
-struct UpstreamStats
-{
- static constexpr uint64_t MaxSampleCount = 1000ull;
-
- UpstreamStats(bool Enabled) : m_Enabled(Enabled) {}
-
- void Add(spdlog::logger& Logger,
- UpstreamEndpoint& Endpoint,
- const GetUpstreamCacheResult& Result,
- const std::vector<std::unique_ptr<UpstreamEndpoint>>& Endpoints)
- {
- UpstreamEndpointStats& Stats = Endpoint.Stats();
-
- if (Result.Error)
- {
- Stats.ErrorCount++;
- }
- else if (Result.Success)
- {
- Stats.HitCount++;
- Stats.DownBytes.fetch_add(Result.Bytes);
- Stats.TimeDownMs.fetch_add(uint64_t(Result.ElapsedSeconds * 1000.0));
- }
- else
- {
- Stats.MissCount++;
- }
-
- if (m_Enabled && m_SampleCount++ % MaxSampleCount)
- {
- Dump(Logger, Endpoints);
- }
- }
-
- void Add(spdlog::logger& Logger,
- UpstreamEndpoint& Endpoint,
- const PutUpstreamCacheResult& Result,
- const std::vector<std::unique_ptr<UpstreamEndpoint>>& Endpoints)
- {
- UpstreamEndpointStats& Stats = Endpoint.Stats();
- if (Result.Success)
- {
- Stats.UpCount++;
- Stats.UpBytes.fetch_add(Result.Bytes);
- Stats.TimeUpMs.fetch_add(uint64_t(Result.ElapsedSeconds * 1000.0));
- }
- else
- {
- Stats.ErrorCount++;
- }
-
- if (m_Enabled && m_SampleCount++ % MaxSampleCount)
- {
- Dump(Logger, Endpoints);
- }
- }
-
- void Dump(spdlog::logger& Logger, const std::vector<std::unique_ptr<UpstreamEndpoint>>& Endpoints)
- {
- for (auto& Ep : Endpoints)
- {
- // These stats will not be totally correct as the numbers are not captured atomically
- UpstreamEndpointStats& Stats = Ep->Stats();
- const uint64_t HitCount = Stats.HitCount;
- const uint64_t MissCount = Stats.MissCount;
- const double DownMBytes = double(Stats.DownBytes) / 1024.0 / 1024.0;
- const double SecondsDown = double(Stats.TimeDownMs) / 1000.0;
- const double UpMBytes = double(Stats.UpBytes) / 1024.0 / 1024.0;
- const double SecondsUp = double(Stats.TimeUpMs) / 1000.0;
-
- const double UpSpeed = UpMBytes > 0 ? UpMBytes / SecondsUp : 0.0;
- const double DownSpeed = DownMBytes > 0 ? DownMBytes / SecondsDown : 0.0;
- const uint64_t TotalCount = HitCount + MissCount;
- const double HitRate = TotalCount > 0 ? (double(HitCount) / double(TotalCount)) : 0.0;
-
- Logger.debug("STATS - '{}', Hit rate: {:.2f}%, DOWN: '{:.2f} MiB {:.2f} MiB/s', UP: '{:.2f} MiB {:.2f} MiB/s'",
- Ep->GetEndpointInfo().Name,
- HitRate,
- DownMBytes,
- DownSpeed,
- UpMBytes,
- UpSpeed);
- }
- }
-
- bool m_Enabled;
- std::atomic_uint64_t m_SampleCount = {};
-};
-
-//////////////////////////////////////////////////////////////////////////
-
class DefaultUpstreamCache final : public UpstreamCache
{
public:
@@ -996,71 +956,87 @@ public:
, m_Options(Options)
, m_CacheStore(CacheStore)
, m_CidStore(CidStore)
- , m_Stats(Options.StatsEnabled)
{
}
virtual ~DefaultUpstreamCache() { Shutdown(); }
- virtual bool Initialize() override
+ virtual void Initialize() override
{
- for (auto& Endpoint : m_Endpoints)
+ for (uint32_t Idx = 0; Idx < m_Options.ThreadCount; Idx++)
{
- const UpstreamEndpointHealth Health = Endpoint->Initialize();
- const UpstreamEndpointInfo& Info = Endpoint->GetEndpointInfo();
-
- if (Health.Ok)
- {
- ZEN_INFO("'{}' endpoint '{}' OK", Info.Name, Info.Url);
- }
- else
- {
- ZEN_WARN("'{}' endpoint '{}' FAILED, reason '{}'", Info.Name, Info.Url, Health.Reason);
- }
+ m_UpstreamThreads.emplace_back(&DefaultUpstreamCache::ProcessUpstreamQueue, this);
}
- m_RunState.IsRunning = !m_Endpoints.empty();
+ m_EndpointMonitorThread = std::thread(&DefaultUpstreamCache::MonitorEndpoints, this);
+ m_RunState.IsRunning = true;
+ }
+
+ virtual void RegisterEndpoint(std::unique_ptr<UpstreamEndpoint> Endpoint) override
+ {
+ const UpstreamEndpointStatus Status = Endpoint->Initialize();
+ const UpstreamEndpointInfo& Info = Endpoint->GetEndpointInfo();
- if (m_RunState.IsRunning)
+ ZEN_INFO("register endpoint '{} - {}' {}", Info.Name, Info.Url, ToString(Status.State));
+
+ // Register endpoint even if it fails, the health monitor thread will probe failing endpoint(s)
+ std::unique_lock<std::shared_mutex> _(m_EndpointsMutex);
+ m_Endpoints.emplace_back(std::move(Endpoint));
+ }
+
+ virtual void IterateEndpoints(std::function<bool(UpstreamEndpoint&)>&& Fn) override
+ {
+ std::shared_lock<std::shared_mutex> _(m_EndpointsMutex);
+
+ for (auto& Ep : m_Endpoints)
{
- for (uint32_t Idx = 0; Idx < m_Options.ThreadCount; Idx++)
+ if (!Fn(*Ep))
{
- m_UpstreamThreads.emplace_back(&DefaultUpstreamCache::ProcessUpstreamQueue, this);
+ break;
}
-
- m_EndpointMonitorThread = std::thread(&DefaultUpstreamCache::MonitorEndpoints, this);
}
-
- return m_RunState.IsRunning;
}
- virtual void RegisterEndpoint(std::unique_ptr<UpstreamEndpoint> Endpoint) override { m_Endpoints.emplace_back(std::move(Endpoint)); }
-
virtual GetUpstreamCacheResult GetCacheRecord(CacheKey CacheKey, ZenContentType Type) override
{
ZEN_TRACE_CPU("Upstream::GetCacheRecord");
+ std::shared_lock<std::shared_mutex> _(m_EndpointsMutex);
+
if (m_Options.ReadUpstream)
{
for (auto& Endpoint : m_Endpoints)
{
- if (Endpoint->IsHealthy())
+ if (Endpoint->GetState() != UpstreamEndpointState::kOk)
{
- const GetUpstreamCacheResult Result = Endpoint->GetCacheRecord(CacheKey, Type);
- m_Stats.Add(m_Log, *Endpoint, Result, m_Endpoints);
+ continue;
+ }
- if (Result.Success)
- {
- return Result;
- }
+ UpstreamEndpointStats& Stats = Endpoint->Stats();
+ GetUpstreamCacheResult Result;
+ {
+ metrics::OperationTiming::Scope Scope(Stats.CacheGetRequestTiming);
+ Result = Endpoint->GetCacheRecord(CacheKey, Type);
+ }
- if (Result.Error)
- {
- ZEN_ERROR("get cache record FAILED, endpoint '{}', reason '{}', error code '{}'",
- Endpoint->GetEndpointInfo().Url,
- Result.Error.Reason,
- Result.Error.ErrorCode);
- }
+ Stats.CacheGetCount.Increment(1);
+ Stats.CacheGetTotalBytes.Increment(Result.Bytes);
+
+ if (Result.Success)
+ {
+ Stats.CacheHitCount.Increment(1);
+
+ return Result;
+ }
+
+ if (Result.Error)
+ {
+ Stats.CacheErrorCount.Increment(1);
+
+ ZEN_ERROR("get cache record FAILED, endpoint '{}', reason '{}', error code '{}'",
+ Endpoint->GetEndpointInfo().Url,
+ Result.Error.Reason,
+ Result.Error.ErrorCode);
}
}
}
@@ -1075,42 +1051,62 @@ public:
{
ZEN_TRACE_CPU("Upstream::GetCacheRecords");
- std::vector<size_t> MissingKeys(KeyIndex.begin(), KeyIndex.end());
+ std::shared_lock<std::shared_mutex> _(m_EndpointsMutex);
+
+ std::vector<size_t> RemainingKeys(KeyIndex.begin(), KeyIndex.end());
if (m_Options.ReadUpstream)
{
for (auto& Endpoint : m_Endpoints)
{
- if (Endpoint->IsHealthy() && !MissingKeys.empty())
+ if (RemainingKeys.empty())
+ {
+ break;
+ }
+
+ if (Endpoint->GetState() != UpstreamEndpointState::kOk)
+ {
+ continue;
+ }
+
+ UpstreamEndpointStats& Stats = Endpoint->Stats();
+ std::vector<size_t> Missing;
+ GetUpstreamCacheResult Result;
{
- std::vector<size_t> Missing;
+ metrics::OperationTiming::Scope Scope(Stats.CacheGetRequestTiming);
- auto Result = Endpoint->GetCacheRecords(CacheKeys, MissingKeys, Policy, [&](CacheRecordGetCompleteParams&& Params) {
+ Result = Endpoint->GetCacheRecords(CacheKeys, RemainingKeys, Policy, [&](CacheRecordGetCompleteParams&& Params) {
if (Params.Record)
{
OnComplete(std::forward<CacheRecordGetCompleteParams>(Params));
+
+ Stats.CacheHitCount.Increment(1);
}
else
{
Missing.push_back(Params.KeyIndex);
}
});
+ }
- if (Result.Error)
- {
- ZEN_ERROR("get cache record(s) (rpc) FAILED, endpoint '{}', reason '{}', error code '{}'",
- Endpoint->GetEndpointInfo().Url,
- Result.Error.Reason,
- Result.Error.ErrorCode);
- }
+ Stats.CacheGetCount.Increment(int64_t(RemainingKeys.size()));
+ Stats.CacheGetTotalBytes.Increment(Result.Bytes);
- m_Stats.Add(m_Log, *Endpoint, Result, m_Endpoints);
- MissingKeys = std::move(Missing);
+ if (Result.Error)
+ {
+ Stats.CacheErrorCount.Increment(1);
+
+ ZEN_ERROR("get cache record(s) (rpc) FAILED, endpoint '{}', reason '{}', error code '{}'",
+ Endpoint->GetEndpointInfo().Url,
+ Result.Error.Reason,
+ Result.Error.ErrorCode);
}
+
+ RemainingKeys = std::move(Missing);
}
}
- for (size_t Index : MissingKeys)
+ for (size_t Index : RemainingKeys)
{
OnComplete({.Key = CacheKeys[Index], .KeyIndex = Index, .Record = CbObjectView(), .Package = CbPackage()});
}
@@ -1122,43 +1118,62 @@ public:
{
ZEN_TRACE_CPU("Upstream::GetCachePayloads");
- std::vector<size_t> MissingPayloads(RequestIndex.begin(), RequestIndex.end());
+ std::shared_lock<std::shared_mutex> _(m_EndpointsMutex);
+
+ std::vector<size_t> RemainingKeys(RequestIndex.begin(), RequestIndex.end());
if (m_Options.ReadUpstream)
{
for (auto& Endpoint : m_Endpoints)
{
- if (Endpoint->IsHealthy() && !MissingPayloads.empty())
+ if (RemainingKeys.empty())
{
- std::vector<size_t> Missing;
+ break;
+ }
- auto Result =
- Endpoint->GetCachePayloads(CacheChunkRequests, MissingPayloads, [&](CachePayloadGetCompleteParams&& Params) {
- if (Params.Payload)
- {
- OnComplete(std::forward<CachePayloadGetCompleteParams>(Params));
- }
- else
- {
- Missing.push_back(Params.RequestIndex);
- }
- });
+ if (Endpoint->GetState() != UpstreamEndpointState::kOk)
+ {
+ continue;
+ }
- if (Result.Error)
- {
- ZEN_ERROR("get cache payloads(s) (rpc) FAILED, endpoint '{}', reason '{}', error code '{}'",
- Endpoint->GetEndpointInfo().Url,
- Result.Error.Reason,
- Result.Error.ErrorCode);
- }
+ UpstreamEndpointStats& Stats = Endpoint->Stats();
+ std::vector<size_t> Missing;
+ GetUpstreamCacheResult Result;
+ {
+ metrics::OperationTiming::Scope Scope(Endpoint->Stats().CacheGetRequestTiming);
+
+ Result = Endpoint->GetCachePayloads(CacheChunkRequests, RemainingKeys, [&](CachePayloadGetCompleteParams&& Params) {
+ if (Params.Payload)
+ {
+ OnComplete(std::forward<CachePayloadGetCompleteParams>(Params));
+
+ Stats.CacheHitCount.Increment(1);
+ }
+ else
+ {
+ Missing.push_back(Params.RequestIndex);
+ }
+ });
+ }
- m_Stats.Add(m_Log, *Endpoint, Result, m_Endpoints);
- MissingPayloads = std::move(Missing);
+ Stats.CacheGetCount.Increment(int64_t(RemainingKeys.size()));
+ Stats.CacheGetTotalBytes.Increment(Result.Bytes);
+
+ if (Result.Error)
+ {
+ Stats.CacheErrorCount.Increment(1);
+
+ ZEN_ERROR("get cache payloads(s) (rpc) FAILED, endpoint '{}', reason '{}', error code '{}'",
+ Endpoint->GetEndpointInfo().Url,
+ Result.Error.Reason,
+ Result.Error.ErrorCode);
}
+
+ RemainingKeys = std::move(Missing);
}
}
- for (size_t Index : MissingPayloads)
+ for (size_t Index : RemainingKeys)
{
OnComplete({.Request = CacheChunkRequests[Index], .RequestIndex = Index, .Payload = IoBuffer()});
}
@@ -1172,23 +1187,37 @@ public:
{
for (auto& Endpoint : m_Endpoints)
{
- if (Endpoint->IsHealthy())
+ if (Endpoint->GetState() != UpstreamEndpointState::kOk)
{
- const GetUpstreamCacheResult Result = Endpoint->GetCachePayload(CacheKey, PayloadId);
- m_Stats.Add(m_Log, *Endpoint, Result, m_Endpoints);
+ continue;
+ }
- if (Result.Success)
- {
- return Result;
- }
+ UpstreamEndpointStats& Stats = Endpoint->Stats();
+ GetUpstreamCacheResult Result;
- if (Result.Error)
- {
- ZEN_ERROR("get cache payload FAILED, endpoint '{}', reason '{}', error code '{}'",
- Endpoint->GetEndpointInfo().Url,
- Result.Error.Reason,
- Result.Error.ErrorCode);
- }
+ {
+ metrics::OperationTiming::Scope Scope(Stats.CacheGetRequestTiming);
+ Result = Endpoint->GetCachePayload(CacheKey, PayloadId);
+ }
+
+ Stats.CacheGetCount.Increment(1);
+ Stats.CacheGetTotalBytes.Increment(Result.Bytes);
+
+ if (Result.Success)
+ {
+ Stats.CacheHitCount.Increment(1);
+
+ return Result;
+ }
+
+ if (Result.Error)
+ {
+ Stats.CacheErrorCount.Increment(1);
+
+ ZEN_ERROR("get cache payload FAILED, endpoint '{}', reason '{}', error code '{}'",
+ Endpoint->GetEndpointInfo().Url,
+ Result.Error.Reason,
+ Result.Error.ErrorCode);
}
}
}
@@ -1196,7 +1225,7 @@ public:
return {};
}
- virtual EnqueueResult EnqueueUpstream(UpstreamCacheRecord CacheRecord) override
+ virtual void EnqueueUpstream(UpstreamCacheRecord CacheRecord) override
{
if (m_RunState.IsRunning && m_Options.WriteUpstream)
{
@@ -1208,11 +1237,7 @@ public:
{
ProcessCacheRecord(std::move(CacheRecord));
}
-
- return {.Success = true};
}
-
- return {};
}
virtual void GetStatus(CbObjectWriter& Status) override
@@ -1225,22 +1250,35 @@ public:
Status.BeginArray("endpoints");
for (const auto& Ep : m_Endpoints)
{
- const UpstreamEndpointInfo& Info = Ep->GetEndpointInfo();
- Status.BeginObject();
- Status << "name" << Info.Name;
- Status << "url" << Info.Url;
- Status << "health" << (Ep->IsHealthy() ? "ok"sv : "inactive"sv);
+ const UpstreamEndpointInfo& EpInfo = Ep->GetEndpointInfo();
+ const UpstreamEndpointStatus EpStatus = Ep->GetStatus();
+ UpstreamEndpointStats& EpStats = Ep->Stats();
- UpstreamEndpointStats& Stats = Ep->Stats();
- const uint64_t HitCount = Stats.HitCount;
- const uint64_t MissCount = Stats.MissCount;
- const uint64_t TotalCount = HitCount + MissCount;
- const double HitRate = TotalCount > 0 ? (double(HitCount) / double(TotalCount)) : 0.0;
+ Status.BeginObject();
+ Status << "name" << EpInfo.Name;
+ Status << "url" << EpInfo.Url;
+ Status << "state" << ToString(EpStatus.State);
+ Status << "reason" << EpStatus.Reason;
- Status << "hit_ratio" << HitRate;
- Status << "downloaded_mb" << (double(Stats.DownBytes) / 1024.0 / 1024.0);
- Status << "uploaded_mb" << Stats.UpBytes;
- Status << "error_count" << Stats.ErrorCount;
+ Status.BeginObject("cache"sv);
+ {
+ const int64_t GetCount = EpStats.CacheGetCount.Value();
+ const int64_t HitCount = EpStats.CacheHitCount.Value();
+ const int64_t ErrorCount = EpStats.CacheErrorCount.Value();
+ const double HitRatio = GetCount > 0 ? double(HitCount) / double(GetCount) : 0.0;
+ const double ErrorRatio = GetCount > 0 ? double(ErrorCount) / double(GetCount) : 0.0;
+
+ metrics::EmitSnapshot("get_requests"sv, EpStats.CacheGetRequestTiming, Status);
+ Status << "get_bytes" << EpStats.CacheGetTotalBytes.Value();
+ Status << "get_count" << GetCount;
+ Status << "hit_count" << HitCount;
+ Status << "hit_ratio" << HitRatio;
+ Status << "error_count" << ErrorCount;
+ Status << "error_ratio" << ErrorRatio;
+ metrics::EmitSnapshot("put_requests"sv, EpStats.CachePutRequestTiming, Status);
+ Status << "put_bytes" << EpStats.CachePutTotalBytes.Value();
+ }
+ Status.EndObject();
Status.EndObject();
}
@@ -1277,21 +1315,31 @@ private:
}
}
+ std::shared_lock<std::shared_mutex> _(m_EndpointsMutex);
+
for (auto& Endpoint : m_Endpoints)
{
- if (Endpoint->IsHealthy())
+ if (Endpoint->GetState() != UpstreamEndpointState::kOk)
{
- const PutUpstreamCacheResult Result = Endpoint->PutCacheRecord(CacheRecord, CacheValue.Value, std::span(Payloads));
- m_Stats.Add(m_Log, *Endpoint, Result, m_Endpoints);
+ continue;
+ }
- if (!Result.Success)
- {
- ZEN_WARN("upload cache record '{}/{}' FAILED, endpoint '{}', reason '{}'",
- CacheRecord.Key.Bucket,
- CacheRecord.Key.Hash,
- Endpoint->GetEndpointInfo().Url,
- Result.Reason);
- }
+ UpstreamEndpointStats& Stats = Endpoint->Stats();
+ PutUpstreamCacheResult Result;
+ {
+ metrics::OperationTiming::Scope Scope(Stats.CachePutRequestTiming);
+ Result = Endpoint->PutCacheRecord(CacheRecord, CacheValue.Value, std::span(Payloads));
+ }
+
+ Stats.CachePutTotalBytes.Increment(Result.Bytes);
+
+ if (!Result.Success)
+ {
+ ZEN_WARN("upload cache record '{}/{}' FAILED, endpoint '{}', reason '{}'",
+ CacheRecord.Key.Bucket,
+ CacheRecord.Key.Hash,
+ Endpoint->GetEndpointInfo().Url,
+ Result.Reason);
}
}
}
@@ -1334,21 +1382,35 @@ private:
try
{
- for (auto& Endpoint : m_Endpoints)
+ std::vector<UpstreamEndpoint*> Endpoints;
+
{
- if (!Endpoint->IsHealthy())
+ std::shared_lock<std::shared_mutex> _(m_EndpointsMutex);
+
+ for (auto& Endpoint : m_Endpoints)
{
- const UpstreamEndpointInfo& Info = Endpoint->GetEndpointInfo();
- if (const UpstreamEndpointHealth Health = Endpoint->CheckHealth(); Health.Ok)
+ if (Endpoint->GetState() == UpstreamEndpointState::kError ||
+ Endpoint->GetState() == UpstreamEndpointState::kUnauthorized)
{
- ZEN_INFO("health check endpoint '{} - {}' OK", Info.Name, Info.Url, Health.Reason);
- }
- else
- {
- ZEN_WARN("health check endpoint '{} - {}' FAILED, reason '{}'", Info.Name, Info.Url, Health.Reason);
+ Endpoints.push_back(Endpoint.get());
}
}
}
+
+ for (auto& Endpoint : Endpoints)
+ {
+ const UpstreamEndpointInfo& Info = Endpoint->GetEndpointInfo();
+ const UpstreamEndpointStatus Status = Endpoint->Initialize();
+
+ if (Status.State == UpstreamEndpointState::kOk)
+ {
+ ZEN_INFO("health check endpoint '{} - {}' OK", Info.Name, Info.Url);
+ }
+ else
+ {
+ ZEN_WARN("health check endpoint '{} - {}' FAILED, reason '{}'", Info.Name, Info.Url, Status.Reason);
+ }
+ }
}
catch (std::exception& Err)
{
@@ -1403,7 +1465,7 @@ private:
ZenCacheStore& m_CacheStore;
CidStore& m_CidStore;
UpstreamQueue m_UpstreamQueue;
- UpstreamStats m_Stats;
+ std::shared_mutex m_EndpointsMutex;
std::vector<std::unique_ptr<UpstreamEndpoint>> m_Endpoints;
std::vector<std::thread> m_UpstreamThreads;
std::thread m_EndpointMonitorThread;