// Copyright Epic Games, Inc. All Rights Reserved. #include "upstreamcache.h" #include "jupiter.h" #include "zen.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cache/structuredcachestore.h" #include "diag/logging.h" #include #include #include #include #include #include namespace zen { using namespace std::literals; namespace detail { class UpstreamStatus { public: UpstreamEndpointState EndpointState() const { return static_cast(m_State.load(std::memory_order_relaxed)); } UpstreamEndpointStatus EndpointStatus() const { const UpstreamEndpointState State = EndpointState(); { std::unique_lock _(m_Mutex); return {.Reason = m_ErrorText, .State = State}; } } void Set(UpstreamEndpointState NewState) { m_State.store(static_cast(NewState), std::memory_order_relaxed); { std::unique_lock _(m_Mutex); m_ErrorText.clear(); } } void Set(UpstreamEndpointState NewState, std::string ErrorText) { m_State.store(static_cast(NewState), std::memory_order_relaxed); { std::unique_lock _(m_Mutex); m_ErrorText = std::move(ErrorText); } } void SetFromErrorCode(int32_t ErrorCode, std::string_view ErrorText) { if (ErrorCode != 0) { Set(ErrorCode == 401 ? UpstreamEndpointState::kUnauthorized : UpstreamEndpointState::kError, std::string(ErrorText)); } } private: mutable std::mutex m_Mutex; std::string m_ErrorText; std::atomic_uint32_t m_State; }; class JupiterUpstreamEndpoint final : public UpstreamEndpoint { public: JupiterUpstreamEndpoint(const CloudCacheClientOptions& Options, AuthMgr& Mgr) : m_AuthMgr(Mgr) , m_Log(zen::logging::Get("upstream")) , m_UseLegacyDdc(Options.UseLegacyDdc) { ZEN_ASSERT(!Options.Name.empty()); m_Info.Name = Options.Name; m_Info.Url = Options.ServiceUrl; m_Client = new CloudCacheClient(Options); } virtual ~JupiterUpstreamEndpoint() = default; virtual const UpstreamEndpointInfo& GetEndpointInfo() const override { return m_Info; } virtual UpstreamEndpointStatus Initialize() override { try { if (m_Status.EndpointState() == UpstreamEndpointState::kOk) { return {.State = UpstreamEndpointState::kOk}; } CloudCacheSession Session(m_Client); const CloudCacheResult Result = Session.Authenticate(); if (Result.Success) { m_Status.Set(UpstreamEndpointState::kOk); } else if (Result.ErrorCode != 0) { m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); } else { m_Status.Set(UpstreamEndpointState::kUnauthorized); } return m_Status.EndpointStatus(); } catch (std::exception& Err) { m_Status.Set(UpstreamEndpointState::kError, Err.what()); return {.Reason = Err.what(), .State = GetState()}; } } virtual UpstreamEndpointState GetState() override { return m_Status.EndpointState(); } virtual UpstreamEndpointStatus GetStatus() override { return m_Status.EndpointStatus(); } virtual GetUpstreamCacheResult GetCacheRecord(CacheKey CacheKey, ZenContentType Type) override { ZEN_TRACE_CPU("Upstream::Horde::GetSingleCacheRecord"); try { CloudCacheSession Session(m_Client); CloudCacheResult Result; if (m_UseLegacyDdc && Type == ZenContentType::kBinary) { Result = Session.GetDerivedData(CacheKey.Bucket, CacheKey.Hash); } else { const ZenContentType AcceptType = Type == ZenContentType::kCbPackage ? ZenContentType::kCbObject : Type; Result = Session.GetRef(CacheKey.Bucket, CacheKey.Hash, AcceptType); if (Result.Success && Type == ZenContentType::kCbPackage) { CbPackage Package; const CbValidateError ValidationResult = ValidateCompactBinary(Result.Response, CbValidateMode::All); if (Result.Success = ValidationResult == CbValidateError::None; Result.Success) { CbObject CacheRecord = LoadCompactBinaryObject(Result.Response); CacheRecord.IterateAttachments([&Session, &Result, &Package](CbFieldView AttachmentHash) { CloudCacheResult AttachmentResult = Session.GetCompressedBlob(AttachmentHash.AsHash()); Result.Bytes += AttachmentResult.Bytes; Result.ElapsedSeconds += AttachmentResult.ElapsedSeconds; Result.ErrorCode = AttachmentResult.ErrorCode; if (CompressedBuffer Chunk = CompressedBuffer::FromCompressed(SharedBuffer(AttachmentResult.Response))) { Package.AddAttachment(CbAttachment(Chunk)); } else { Result.Success = false; } }); Package.SetObject(CacheRecord); } if (Result.Success) { BinaryWriter MemStream; Package.Save(MemStream); Result.Response = IoBuffer(IoBuffer::Clone, MemStream.Data(), MemStream.Size()); } } } m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); if (Result.ErrorCode == 0) { return {.Value = Result.Response, .Bytes = Result.Bytes, .ElapsedSeconds = Result.ElapsedSeconds, .Success = Result.Success}; } else { return {.Error{.ErrorCode = Result.ErrorCode, .Reason = std::move(Result.Reason)}}; } } catch (std::exception& Err) { m_Status.Set(UpstreamEndpointState::kError, Err.what()); return {.Error{.ErrorCode = -1, .Reason = Err.what()}}; } } virtual GetUpstreamCacheResult GetCacheRecords(std::span CacheKeys, std::span KeyIndex, const CacheRecordPolicy& Policy, OnCacheRecordGetComplete&& OnComplete) override { ZEN_TRACE_CPU("Upstream::Horde::GetCacheRecords"); ZEN_UNUSED(Policy); CloudCacheSession Session(m_Client); GetUpstreamCacheResult Result; for (size_t Index : KeyIndex) { const CacheKey& CacheKey = CacheKeys[Index]; CbPackage Package; CbObject Record; if (!Result.Error) { CloudCacheResult RefResult = Session.GetRef(CacheKey.Bucket, CacheKey.Hash, ZenContentType::kCbObject); AppendResult(RefResult, Result); m_Status.SetFromErrorCode(RefResult.ErrorCode, RefResult.Reason); if (RefResult.ErrorCode == 0) { const CbValidateError ValidationResult = ValidateCompactBinary(RefResult.Response, CbValidateMode::All); if (ValidationResult == CbValidateError::None) { Record = LoadCompactBinaryObject(RefResult.Response); Record.IterateAttachments([this, &Session, &Result, &Package](CbFieldView AttachmentHash) { CloudCacheResult BlobResult = Session.GetCompressedBlob(AttachmentHash.AsHash()); AppendResult(BlobResult, Result); m_Status.SetFromErrorCode(BlobResult.ErrorCode, BlobResult.Reason); if (BlobResult.ErrorCode == 0) { if (CompressedBuffer Chunk = CompressedBuffer::FromCompressed(SharedBuffer(BlobResult.Response))) { Package.AddAttachment(CbAttachment(Chunk)); } } }); } } } OnComplete({.Key = CacheKey, .KeyIndex = Index, .Record = Record, .Package = Package}); } return Result; } virtual GetUpstreamCacheResult GetCacheValue(const CacheKey&, const IoHash& ValueContentId) override { ZEN_TRACE_CPU("Upstream::Horde::GetSingleCacheValue"); try { CloudCacheSession Session(m_Client); const CloudCacheResult Result = Session.GetCompressedBlob(ValueContentId); m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); if (Result.ErrorCode == 0) { return {.Value = Result.Response, .Bytes = Result.Bytes, .ElapsedSeconds = Result.ElapsedSeconds, .Success = Result.Success}; } else { return {.Error{.ErrorCode = Result.ErrorCode, .Reason = std::move(Result.Reason)}}; } } catch (std::exception& Err) { m_Status.Set(UpstreamEndpointState::kError, Err.what()); return {.Error{.ErrorCode = -1, .Reason = Err.what()}}; } } virtual GetUpstreamCacheResult GetCacheValues(std::span CacheChunkRequests, std::span RequestIndex, OnCacheValueGetComplete&& OnComplete) override final { ZEN_TRACE_CPU("Upstream::Horde::GetCacheValues"); CloudCacheSession Session(m_Client); GetUpstreamCacheResult Result; for (size_t Index : RequestIndex) { const CacheChunkRequest& Request = CacheChunkRequests[Index]; IoBuffer Payload; if (!Result.Error) { const CloudCacheResult BlobResult = Session.GetCompressedBlob(Request.ChunkId); Payload = BlobResult.Response; AppendResult(BlobResult, Result); m_Status.SetFromErrorCode(BlobResult.ErrorCode, BlobResult.Reason); } OnComplete({.Request = Request, .RequestIndex = Index, .Value = Payload}); } return Result; } virtual PutUpstreamCacheResult PutCacheRecord(const UpstreamCacheRecord& CacheRecord, IoBuffer RecordValue, std::span Values) override { ZEN_TRACE_CPU("Upstream::Horde::PutCacheRecord"); ZEN_ASSERT(CacheRecord.ValueContentIds.size() == Values.size()); const int32_t MaxAttempts = 3; try { CloudCacheSession Session(m_Client); if (CacheRecord.Type == ZenContentType::kBinary) { CloudCacheResult Result; for (uint32_t Attempt = 0; Attempt < MaxAttempts && !Result.Success; Attempt++) { if (m_UseLegacyDdc) { Result = Session.PutDerivedData(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RecordValue); } else { Result = Session.PutRef(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RecordValue, ZenContentType::kBinary); } } m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); return {.Reason = std::move(Result.Reason), .Bytes = Result.Bytes, .ElapsedSeconds = Result.ElapsedSeconds, .Success = Result.Success}; } else { int64_t TotalBytes = 0ull; double TotalElapsedSeconds = 0.0; const auto PutBlobs = [&](std::span ValueContentIds, std::string& OutReason) -> bool { for (const IoHash& ValueContentId : ValueContentIds) { const auto It = std::find(std::begin(CacheRecord.ValueContentIds), std::end(CacheRecord.ValueContentIds), ValueContentId); if (It == std::end(CacheRecord.ValueContentIds)) { OutReason = fmt::format("value '{}' MISSING from local cache", ValueContentId); return false; } const size_t Idx = std::distance(std::begin(CacheRecord.ValueContentIds), It); CloudCacheResult BlobResult; for (int32_t Attempt = 0; Attempt < MaxAttempts && !BlobResult.Success; Attempt++) { BlobResult = Session.PutCompressedBlob(CacheRecord.ValueContentIds[Idx], Values[Idx]); } m_Status.SetFromErrorCode(BlobResult.ErrorCode, BlobResult.Reason); if (!BlobResult.Success) { OutReason = fmt::format("upload value '{}' FAILED, reason '{}'", ValueContentId, BlobResult.Reason); return false; } TotalBytes += BlobResult.Bytes; TotalElapsedSeconds += BlobResult.ElapsedSeconds; } return true; }; PutRefResult RefResult; for (int32_t Attempt = 0; Attempt < MaxAttempts && !RefResult.Success; Attempt++) { RefResult = Session.PutRef(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RecordValue, ZenContentType::kCbObject); } m_Status.SetFromErrorCode(RefResult.ErrorCode, RefResult.Reason); if (!RefResult.Success) { return {.Reason = fmt::format("upload cache record '{}/{}' FAILED, reason '{}'", CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RefResult.Reason), .Success = false}; } TotalBytes += RefResult.Bytes; TotalElapsedSeconds += RefResult.ElapsedSeconds; std::string Reason; if (!PutBlobs(RefResult.Needs, Reason)) { return {.Reason = std::move(Reason), .Success = false}; } const IoHash RefHash = IoHash::HashBuffer(RecordValue); FinalizeRefResult FinalizeResult = Session.FinalizeRef(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RefHash); m_Status.SetFromErrorCode(FinalizeResult.ErrorCode, FinalizeResult.Reason); if (!FinalizeResult.Success) { return {.Reason = fmt::format("finalize cache record '{}/{}' FAILED, reason '{}'", CacheRecord.Key.Bucket, CacheRecord.Key.Hash, FinalizeResult.Reason), .Success = false}; } if (!FinalizeResult.Needs.empty()) { if (!PutBlobs(FinalizeResult.Needs, Reason)) { return {.Reason = std::move(Reason), .Success = false}; } FinalizeResult = Session.FinalizeRef(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RefHash); m_Status.SetFromErrorCode(FinalizeResult.ErrorCode, FinalizeResult.Reason); if (!FinalizeResult.Success) { return {.Reason = fmt::format("finalize '{}/{}' FAILED, reason '{}'", CacheRecord.Key.Bucket, CacheRecord.Key.Hash, FinalizeResult.Reason), .Success = false}; } if (!FinalizeResult.Needs.empty()) { ExtendableStringBuilder<256> Sb; for (const IoHash& MissingHash : FinalizeResult.Needs) { Sb << MissingHash.ToHexString() << ","; } return {.Reason = fmt::format("finalize '{}/{}' FAILED, still needs value(s) '{}'", CacheRecord.Key.Bucket, CacheRecord.Key.Hash, Sb.ToString()), .Success = false}; } } TotalBytes += FinalizeResult.Bytes; TotalElapsedSeconds += FinalizeResult.ElapsedSeconds; return {.Bytes = TotalBytes, .ElapsedSeconds = TotalElapsedSeconds, .Success = true}; } } catch (std::exception& Err) { m_Status.Set(UpstreamEndpointState::kError, Err.what()); return {.Reason = std::string(Err.what()), .Success = false}; } } virtual UpstreamEndpointStats& Stats() override { return m_Stats; } private: static void AppendResult(const CloudCacheResult& Result, GetUpstreamCacheResult& Out) { Out.Success &= Result.Success; Out.Bytes += Result.Bytes; Out.ElapsedSeconds += Result.ElapsedSeconds; if (Result.ErrorCode) { Out.Error = {.ErrorCode = Result.ErrorCode, .Reason = std::move(Result.Reason)}; } }; spdlog::logger& Log() { return m_Log; } AuthMgr& m_AuthMgr; spdlog::logger& m_Log; UpstreamEndpointInfo m_Info; UpstreamStatus m_Status; UpstreamEndpointStats m_Stats; bool m_UseLegacyDdc; RefPtr m_Client; }; class ZenUpstreamEndpoint final : public UpstreamEndpoint { struct ZenEndpoint { std::string Url; std::string Reason; double Latency{}; bool Ok = false; bool operator<(const ZenEndpoint& RHS) const { return Ok && RHS.Ok ? Latency < RHS.Latency : Ok; } }; public: ZenUpstreamEndpoint(const ZenStructuredCacheClientOptions& Options) : m_Log(zen::logging::Get("upstream")) , m_ConnectTimeout(Options.ConnectTimeout) , m_Timeout(Options.Timeout) { ZEN_ASSERT(!Options.Name.empty()); m_Info.Name = Options.Name; for (const auto& Url : Options.Urls) { m_Endpoints.push_back({.Url = Url}); } } ~ZenUpstreamEndpoint() = default; virtual const UpstreamEndpointInfo& GetEndpointInfo() const override { return m_Info; } virtual UpstreamEndpointStatus Initialize() override { try { if (m_Status.EndpointState() == UpstreamEndpointState::kOk) { return {.State = UpstreamEndpointState::kOk}; } const ZenEndpoint& Ep = GetEndpoint(); m_Info.Url = Ep.Url; if (Ep.Ok) { m_Client = new ZenStructuredCacheClient({.Url = m_Info.Url, .ConnectTimeout = m_ConnectTimeout, .Timeout = m_Timeout}); m_Status.Set(UpstreamEndpointState::kOk); } else { m_Status.Set(UpstreamEndpointState::kError, Ep.Reason); } return m_Status.EndpointStatus(); } catch (std::exception& Err) { m_Status.Set(UpstreamEndpointState::kError, Err.what()); return {.Reason = Err.what(), .State = GetState()}; } } virtual UpstreamEndpointState GetState() override { return m_Status.EndpointState(); } virtual UpstreamEndpointStatus GetStatus() override { return m_Status.EndpointStatus(); } virtual GetUpstreamCacheResult GetCacheRecord(CacheKey CacheKey, ZenContentType Type) override { ZEN_TRACE_CPU("Upstream::Zen::GetSingleCacheRecord"); try { ZenStructuredCacheSession Session(*m_Client); const ZenCacheResult Result = Session.GetCacheRecord(CacheKey.Bucket, CacheKey.Hash, Type); m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); if (Result.ErrorCode == 0) { return {.Value = Result.Response, .Bytes = Result.Bytes, .ElapsedSeconds = Result.ElapsedSeconds, .Success = Result.Success}; } else { return {.Error{.ErrorCode = Result.ErrorCode, .Reason = std::move(Result.Reason)}}; } } catch (std::exception& Err) { m_Status.Set(UpstreamEndpointState::kError, Err.what()); return {.Error{.ErrorCode = -1, .Reason = Err.what()}}; } } virtual GetUpstreamCacheResult GetCacheRecords(std::span CacheKeys, std::span KeyIndex, const CacheRecordPolicy& Policy, OnCacheRecordGetComplete&& OnComplete) override { ZEN_TRACE_CPU("Upstream::Zen::GetCacheRecords"); std::vector IndexMap; IndexMap.reserve(KeyIndex.size()); CbObjectWriter BatchRequest; BatchRequest << "Method"sv << "GetCacheRecords"; BatchRequest.BeginObject("Params"sv); { BatchRequest.BeginArray("CacheKeys"sv); for (size_t Index : KeyIndex) { const CacheKey& Key = CacheKeys[Index]; IndexMap.push_back(Index); BatchRequest.BeginObject(); BatchRequest << "Bucket"sv << Key.Bucket; BatchRequest << "Hash"sv << Key.Hash; BatchRequest.EndObject(); } BatchRequest.EndArray(); BatchRequest.SetName("Policy"sv); Policy.Save(BatchRequest); } BatchRequest.EndObject(); CbPackage BatchResponse; ZenCacheResult Result; { ZenStructuredCacheSession Session(*m_Client); Result = Session.InvokeRpc(BatchRequest.Save()); } m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); if (Result.Success) { if (BatchResponse.TryLoad(Result.Response)) { for (size_t LocalIndex = 0; CbFieldView Record : BatchResponse.GetObject()["Result"sv]) { const size_t Index = IndexMap[LocalIndex++]; OnComplete({.Key = CacheKeys[Index], .KeyIndex = Index, .Record = Record.AsObjectView(), .Package = BatchResponse}); } return {.Bytes = Result.Bytes, .ElapsedSeconds = Result.ElapsedSeconds, .Success = true}; } } for (size_t Index : KeyIndex) { OnComplete({.Key = CacheKeys[Index], .KeyIndex = Index, .Record = CbObjectView(), .Package = CbPackage()}); } return {.Error{.ErrorCode = Result.ErrorCode, .Reason = std::move(Result.Reason)}}; } virtual GetUpstreamCacheResult GetCacheValue(const CacheKey& CacheKey, const IoHash& ValueContentId) override { ZEN_TRACE_CPU("Upstream::Zen::GetSingleCacheValue"); try { ZenStructuredCacheSession Session(*m_Client); const ZenCacheResult Result = Session.GetCacheValue(CacheKey.Bucket, CacheKey.Hash, ValueContentId); m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); if (Result.ErrorCode == 0) { return {.Value = Result.Response, .Bytes = Result.Bytes, .ElapsedSeconds = Result.ElapsedSeconds, .Success = Result.Success}; } else { return {.Error{.ErrorCode = Result.ErrorCode, .Reason = std::move(Result.Reason)}}; } } catch (std::exception& Err) { m_Status.Set(UpstreamEndpointState::kError, Err.what()); return {.Error{.ErrorCode = -1, .Reason = Err.what()}}; } } virtual GetUpstreamCacheResult GetCacheValues(std::span CacheChunkRequests, std::span RequestIndex, OnCacheValueGetComplete&& OnComplete) override final { ZEN_TRACE_CPU("Upstream::Zen::GetCacheValues"); std::vector IndexMap; IndexMap.reserve(RequestIndex.size()); CbObjectWriter BatchRequest; BatchRequest << "Method"sv << "GetCacheValues"; BatchRequest.BeginObject("Params"sv); { BatchRequest.BeginArray("ChunkRequests"sv); { for (size_t Index : RequestIndex) { const CacheChunkRequest& Request = CacheChunkRequests[Index]; IndexMap.push_back(Index); BatchRequest.BeginObject(); { BatchRequest.BeginObject("Key"sv); BatchRequest << "Bucket"sv << Request.Key.Bucket; BatchRequest << "Hash"sv << Request.Key.Hash; BatchRequest.EndObject(); BatchRequest.AddObjectId("ValueId"sv, Request.ValueId); BatchRequest << "ChunkId"sv << Request.ChunkId; BatchRequest << "RawOffset"sv << Request.RawOffset; BatchRequest << "RawSize"sv << Request.RawSize; BatchRequest << "Policy"sv << WriteToString<128>(Request.Policy).ToView(); } BatchRequest.EndObject(); } } BatchRequest.EndArray(); } BatchRequest.EndObject(); CbPackage BatchResponse; ZenCacheResult Result; { ZenStructuredCacheSession Session(*m_Client); Result = Session.InvokeRpc(BatchRequest.Save()); } m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); if (Result.Success) { if (BatchResponse.TryLoad(Result.Response)) { for (size_t LocalIndex = 0; CbFieldView AttachmentHash : BatchResponse.GetObject()["Result"sv]) { const size_t Index = IndexMap[LocalIndex++]; IoBuffer Payload; if (const CbAttachment* Attachment = BatchResponse.FindAttachment(AttachmentHash.AsHash())) { if (const CompressedBuffer& Compressed = Attachment->AsCompressedBinary()) { Payload = Compressed.GetCompressed().Flatten().AsIoBuffer(); } } OnComplete({.Request = CacheChunkRequests[Index], .RequestIndex = Index, .Value = std::move(Payload)}); } return {.Bytes = Result.Bytes, .ElapsedSeconds = Result.ElapsedSeconds, .Success = true}; } } for (size_t Index : RequestIndex) { OnComplete({.Request = CacheChunkRequests[Index], .RequestIndex = Index, .Value = IoBuffer()}); } return {.Error{.ErrorCode = Result.ErrorCode, .Reason = std::move(Result.Reason)}}; } virtual PutUpstreamCacheResult PutCacheRecord(const UpstreamCacheRecord& CacheRecord, IoBuffer RecordValue, std::span Values) override { ZEN_TRACE_CPU("Upstream::Zen::PutCacheRecord"); ZEN_ASSERT(CacheRecord.ValueContentIds.size() == Values.size()); const int32_t MaxAttempts = 3; try { ZenStructuredCacheSession Session(*m_Client); ZenCacheResult Result; int64_t TotalBytes = 0ull; double TotalElapsedSeconds = 0.0; if (CacheRecord.Type == ZenContentType::kCbPackage) { CbPackage Package; Package.SetObject(CbObject(SharedBuffer(RecordValue))); for (const IoBuffer& Value : Values) { if (CompressedBuffer AttachmentBuffer = CompressedBuffer::FromCompressed(SharedBuffer(Value))) { Package.AddAttachment(CbAttachment(AttachmentBuffer)); } else { return {.Reason = std::string("invalid value buffer"), .Success = false}; } } BinaryWriter MemStream; Package.Save(MemStream); IoBuffer PackagePayload(IoBuffer::Wrap, MemStream.Data(), MemStream.Size()); for (uint32_t Attempt = 0; Attempt < MaxAttempts && !Result.Success; Attempt++) { Result = Session.PutCacheRecord(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, PackagePayload, CacheRecord.Type); } m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); TotalBytes = Result.Bytes; TotalElapsedSeconds = Result.ElapsedSeconds; } else { for (size_t Idx = 0, Count = Values.size(); Idx < Count; Idx++) { Result.Success = false; for (uint32_t Attempt = 0; Attempt < MaxAttempts && !Result.Success; Attempt++) { Result = Session.PutCacheValue(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, CacheRecord.ValueContentIds[Idx], Values[Idx]); } m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); TotalBytes += Result.Bytes; TotalElapsedSeconds += Result.ElapsedSeconds; if (!Result.Success) { return {.Reason = "Failed to upload value", .Bytes = TotalBytes, .ElapsedSeconds = TotalElapsedSeconds, .Success = false}; } } Result.Success = false; for (uint32_t Attempt = 0; Attempt < MaxAttempts && !Result.Success; Attempt++) { Result = Session.PutCacheRecord(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RecordValue, CacheRecord.Type); } m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); TotalBytes += Result.Bytes; TotalElapsedSeconds += Result.ElapsedSeconds; } return {.Reason = std::move(Result.Reason), .Bytes = TotalBytes, .ElapsedSeconds = TotalElapsedSeconds, .Success = Result.Success}; } catch (std::exception& Err) { m_Status.Set(UpstreamEndpointState::kError, Err.what()); return {.Reason = std::string(Err.what()), .Success = false}; } } virtual UpstreamEndpointStats& Stats() override { return m_Stats; } private: const ZenEndpoint& GetEndpoint() { for (ZenEndpoint& Ep : m_Endpoints) { ZenStructuredCacheClient Client({.Url = Ep.Url, .ConnectTimeout = std::chrono::milliseconds(1000)}); ZenStructuredCacheSession Session(Client); const int32_t SampleCount = 2; Ep.Ok = false; Ep.Latency = {}; for (int32_t Sample = 0; Sample < SampleCount; ++Sample) { ZenCacheResult Result = Session.CheckHealth(); Ep.Ok = Result.Success; Ep.Reason = std::move(Result.Reason); Ep.Latency += Result.ElapsedSeconds; } Ep.Latency /= double(SampleCount); } std::sort(std::begin(m_Endpoints), std::end(m_Endpoints)); for (const auto& Ep : m_Endpoints) { ZEN_INFO("ping 'Zen' endpoint '{}' latency '{:.3}s' {}", Ep.Url, Ep.Latency, Ep.Ok ? "OK" : Ep.Reason); } return m_Endpoints.front(); } spdlog::logger& Log() { return m_Log; } spdlog::logger& m_Log; UpstreamEndpointInfo m_Info; UpstreamStatus m_Status; UpstreamEndpointStats m_Stats; std::vector m_Endpoints; std::chrono::milliseconds m_ConnectTimeout; std::chrono::milliseconds m_Timeout; RefPtr m_Client; }; } // namespace detail ////////////////////////////////////////////////////////////////////////// class DefaultUpstreamCache final : public UpstreamCache { public: DefaultUpstreamCache(const UpstreamCacheOptions& Options, ZenCacheStore& CacheStore, CidStore& CidStore) : m_Log(logging::Get("upstream")) , m_Options(Options) , m_CacheStore(CacheStore) , m_CidStore(CidStore) { } virtual ~DefaultUpstreamCache() { Shutdown(); } virtual void Initialize() override { for (uint32_t Idx = 0; Idx < m_Options.ThreadCount; Idx++) { m_UpstreamThreads.emplace_back(&DefaultUpstreamCache::ProcessUpstreamQueue, this); } m_EndpointMonitorThread = std::thread(&DefaultUpstreamCache::MonitorEndpoints, this); m_RunState.IsRunning = true; } virtual void RegisterEndpoint(std::unique_ptr Endpoint) override { const UpstreamEndpointStatus Status = Endpoint->Initialize(); const UpstreamEndpointInfo& Info = Endpoint->GetEndpointInfo(); ZEN_INFO("register endpoint '{} - {}' {}", Info.Name, Info.Url, ToString(Status.State)); // Register endpoint even if it fails, the health monitor thread will probe failing endpoint(s) std::unique_lock _(m_EndpointsMutex); m_Endpoints.emplace_back(std::move(Endpoint)); } virtual void IterateEndpoints(std::function&& Fn) override { std::shared_lock _(m_EndpointsMutex); for (auto& Ep : m_Endpoints) { if (!Fn(*Ep)) { break; } } } virtual GetUpstreamCacheResult GetCacheRecord(CacheKey CacheKey, ZenContentType Type) override { ZEN_TRACE_CPU("Upstream::GetCacheRecord"); std::shared_lock _(m_EndpointsMutex); if (m_Options.ReadUpstream) { for (auto& Endpoint : m_Endpoints) { if (Endpoint->GetState() != UpstreamEndpointState::kOk) { continue; } UpstreamEndpointStats& Stats = Endpoint->Stats(); GetUpstreamCacheResult Result; { metrics::OperationTiming::Scope Scope(Stats.CacheGetRequestTiming); Result = Endpoint->GetCacheRecord(CacheKey, Type); } Stats.CacheGetCount.Increment(1); Stats.CacheGetTotalBytes.Increment(Result.Bytes); if (Result.Success) { Stats.CacheHitCount.Increment(1); return Result; } if (Result.Error) { Stats.CacheErrorCount.Increment(1); ZEN_ERROR("get cache record FAILED, endpoint '{}', reason '{}', error code '{}'", Endpoint->GetEndpointInfo().Url, Result.Error.Reason, Result.Error.ErrorCode); } } } return {}; } virtual void GetCacheRecords(std::span CacheKeys, std::span KeyIndex, const CacheRecordPolicy& DownstreamPolicy, OnCacheRecordGetComplete&& OnComplete) override final { ZEN_TRACE_CPU("Upstream::GetCacheRecords"); std::shared_lock _(m_EndpointsMutex); std::vector RemainingKeys(KeyIndex.begin(), KeyIndex.end()); if (m_Options.ReadUpstream) { CacheRecordPolicy UpstreamPolicy = DownstreamPolicy.ConvertToUpstream(); for (auto& Endpoint : m_Endpoints) { if (RemainingKeys.empty()) { break; } if (Endpoint->GetState() != UpstreamEndpointState::kOk) { continue; } UpstreamEndpointStats& Stats = Endpoint->Stats(); std::vector Missing; GetUpstreamCacheResult Result; { metrics::OperationTiming::Scope Scope(Stats.CacheGetRequestTiming); Result = Endpoint->GetCacheRecords(CacheKeys, RemainingKeys, UpstreamPolicy, [&](CacheRecordGetCompleteParams&& Params) { if (Params.Record) { OnComplete(std::forward(Params)); Stats.CacheHitCount.Increment(1); } else { Missing.push_back(Params.KeyIndex); } }); } Stats.CacheGetCount.Increment(int64_t(RemainingKeys.size())); Stats.CacheGetTotalBytes.Increment(Result.Bytes); if (Result.Error) { Stats.CacheErrorCount.Increment(1); ZEN_ERROR("get cache record(s) (rpc) FAILED, endpoint '{}', reason '{}', error code '{}'", Endpoint->GetEndpointInfo().Url, Result.Error.Reason, Result.Error.ErrorCode); } RemainingKeys = std::move(Missing); } } for (size_t Index : RemainingKeys) { OnComplete({.Key = CacheKeys[Index], .KeyIndex = Index, .Record = CbObjectView(), .Package = CbPackage()}); } } virtual void GetCacheValues(std::span CacheChunkRequests, std::span RequestIndex, OnCacheValueGetComplete&& OnComplete) override final { ZEN_TRACE_CPU("Upstream::GetCacheValues"); std::shared_lock _(m_EndpointsMutex); std::vector RemainingKeys(RequestIndex.begin(), RequestIndex.end()); if (m_Options.ReadUpstream) { for (auto& Endpoint : m_Endpoints) { if (RemainingKeys.empty()) { break; } if (Endpoint->GetState() != UpstreamEndpointState::kOk) { continue; } UpstreamEndpointStats& Stats = Endpoint->Stats(); std::vector Missing; GetUpstreamCacheResult Result; { metrics::OperationTiming::Scope Scope(Endpoint->Stats().CacheGetRequestTiming); Result = Endpoint->GetCacheValues(CacheChunkRequests, RemainingKeys, [&](CacheValueGetCompleteParams&& Params) { if (Params.Value) { OnComplete(std::forward(Params)); Stats.CacheHitCount.Increment(1); } else { Missing.push_back(Params.RequestIndex); } }); } Stats.CacheGetCount.Increment(int64_t(RemainingKeys.size())); Stats.CacheGetTotalBytes.Increment(Result.Bytes); if (Result.Error) { Stats.CacheErrorCount.Increment(1); ZEN_ERROR("get cache values(s) (rpc) FAILED, endpoint '{}', reason '{}', error code '{}'", Endpoint->GetEndpointInfo().Url, Result.Error.Reason, Result.Error.ErrorCode); } RemainingKeys = std::move(Missing); } } for (size_t Index : RemainingKeys) { OnComplete({.Request = CacheChunkRequests[Index], .RequestIndex = Index, .Value = IoBuffer()}); } } virtual GetUpstreamCacheResult GetCacheValue(const CacheKey& CacheKey, const IoHash& ValueContentId) override { ZEN_TRACE_CPU("Upstream::GetCacheValue"); if (m_Options.ReadUpstream) { for (auto& Endpoint : m_Endpoints) { if (Endpoint->GetState() != UpstreamEndpointState::kOk) { continue; } UpstreamEndpointStats& Stats = Endpoint->Stats(); GetUpstreamCacheResult Result; { metrics::OperationTiming::Scope Scope(Stats.CacheGetRequestTiming); Result = Endpoint->GetCacheValue(CacheKey, ValueContentId); } Stats.CacheGetCount.Increment(1); Stats.CacheGetTotalBytes.Increment(Result.Bytes); if (Result.Success) { Stats.CacheHitCount.Increment(1); return Result; } if (Result.Error) { Stats.CacheErrorCount.Increment(1); ZEN_ERROR("get cache value FAILED, endpoint '{}', reason '{}', error code '{}'", Endpoint->GetEndpointInfo().Url, Result.Error.Reason, Result.Error.ErrorCode); } } } return {}; } virtual void EnqueueUpstream(UpstreamCacheRecord CacheRecord) override { if (m_RunState.IsRunning && m_Options.WriteUpstream) { if (!m_UpstreamThreads.empty()) { m_UpstreamQueue.Enqueue(std::move(CacheRecord)); } else { ProcessCacheRecord(std::move(CacheRecord)); } } } virtual void GetStatus(CbObjectWriter& Status) override { Status << "reading" << m_Options.ReadUpstream; Status << "writing" << m_Options.WriteUpstream; Status << "worker_threads" << m_Options.ThreadCount; Status << "queue_count" << m_UpstreamQueue.Size(); Status.BeginArray("endpoints"); for (const auto& Ep : m_Endpoints) { const UpstreamEndpointInfo& EpInfo = Ep->GetEndpointInfo(); const UpstreamEndpointStatus EpStatus = Ep->GetStatus(); UpstreamEndpointStats& EpStats = Ep->Stats(); Status.BeginObject(); Status << "name" << EpInfo.Name; Status << "url" << EpInfo.Url; Status << "state" << ToString(EpStatus.State); Status << "reason" << EpStatus.Reason; Status.BeginObject("cache"sv); { const int64_t GetCount = EpStats.CacheGetCount.Value(); const int64_t HitCount = EpStats.CacheHitCount.Value(); const int64_t ErrorCount = EpStats.CacheErrorCount.Value(); const double HitRatio = GetCount > 0 ? double(HitCount) / double(GetCount) : 0.0; const double ErrorRatio = GetCount > 0 ? double(ErrorCount) / double(GetCount) : 0.0; metrics::EmitSnapshot("get_requests"sv, EpStats.CacheGetRequestTiming, Status); Status << "get_bytes" << EpStats.CacheGetTotalBytes.Value(); Status << "get_count" << GetCount; Status << "hit_count" << HitCount; Status << "hit_ratio" << HitRatio; Status << "error_count" << ErrorCount; Status << "error_ratio" << ErrorRatio; metrics::EmitSnapshot("put_requests"sv, EpStats.CachePutRequestTiming, Status); Status << "put_bytes" << EpStats.CachePutTotalBytes.Value(); } Status.EndObject(); Status.EndObject(); } Status.EndArray(); } private: void ProcessCacheRecord(UpstreamCacheRecord CacheRecord) { ZEN_TRACE_CPU("Upstream::ProcessCacheRecord"); ZenCacheValue CacheValue; std::vector Payloads; if (!m_CacheStore.Get(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, CacheValue)) { ZEN_WARN("process upstream FAILED, '{}/{}', cache record doesn't exist", CacheRecord.Key.Bucket, CacheRecord.Key.Hash); return; } for (const IoHash& ValueContentId : CacheRecord.ValueContentIds) { if (IoBuffer Payload = m_CidStore.FindChunkByCid(ValueContentId)) { Payloads.push_back(Payload); } else { ZEN_WARN("process upstream FAILED, '{}/{}/{}', ValueContentId doesn't exist in CAS", CacheRecord.Key.Bucket, CacheRecord.Key.Hash, ValueContentId); return; } } std::shared_lock _(m_EndpointsMutex); for (auto& Endpoint : m_Endpoints) { if (Endpoint->GetState() != UpstreamEndpointState::kOk) { continue; } UpstreamEndpointStats& Stats = Endpoint->Stats(); PutUpstreamCacheResult Result; { metrics::OperationTiming::Scope Scope(Stats.CachePutRequestTiming); Result = Endpoint->PutCacheRecord(CacheRecord, CacheValue.Value, std::span(Payloads)); } Stats.CachePutTotalBytes.Increment(Result.Bytes); if (!Result.Success) { ZEN_WARN("upload cache record '{}/{}' FAILED, endpoint '{}', reason '{}'", CacheRecord.Key.Bucket, CacheRecord.Key.Hash, Endpoint->GetEndpointInfo().Url, Result.Reason); } } } void ProcessUpstreamQueue() { for (;;) { UpstreamCacheRecord CacheRecord; if (m_UpstreamQueue.WaitAndDequeue(CacheRecord)) { try { ProcessCacheRecord(std::move(CacheRecord)); } catch (std::exception& Err) { ZEN_ERROR("upload cache record '{}/{}' FAILED, reason '{}'", CacheRecord.Key.Bucket, CacheRecord.Key.Hash, Err.what()); } } if (!m_RunState.IsRunning) { break; } } } void MonitorEndpoints() { for (;;) { { std::unique_lock lk(m_RunState.Mutex); if (m_RunState.ExitSignal.wait_for(lk, m_Options.HealthCheckInterval, [this]() { return !m_RunState.IsRunning.load(); })) { break; } } try { std::vector Endpoints; { std::shared_lock _(m_EndpointsMutex); for (auto& Endpoint : m_Endpoints) { if (Endpoint->GetState() == UpstreamEndpointState::kError || Endpoint->GetState() == UpstreamEndpointState::kUnauthorized) { Endpoints.push_back(Endpoint.get()); } } } for (auto& Endpoint : Endpoints) { const UpstreamEndpointInfo& Info = Endpoint->GetEndpointInfo(); const UpstreamEndpointStatus Status = Endpoint->Initialize(); if (Status.State == UpstreamEndpointState::kOk) { ZEN_INFO("health check endpoint '{} - {}' OK", Info.Name, Info.Url); } else { ZEN_WARN("health check endpoint '{} - {}' FAILED, reason '{}'", Info.Name, Info.Url, Status.Reason); } } } catch (std::exception& Err) { ZEN_ERROR("check endpoint(s) health FAILED, reason '{}'", Err.what()); } } } void Shutdown() { if (m_RunState.Stop()) { m_UpstreamQueue.CompleteAdding(); for (std::thread& Thread : m_UpstreamThreads) { Thread.join(); } m_EndpointMonitorThread.join(); m_UpstreamThreads.clear(); m_Endpoints.clear(); } } spdlog::logger& Log() { return m_Log; } using UpstreamQueue = BlockingQueue; struct RunState { std::mutex Mutex; std::condition_variable ExitSignal; std::atomic_bool IsRunning{false}; bool Stop() { bool Stopped = false; { std::lock_guard _(Mutex); Stopped = IsRunning.exchange(false); } if (Stopped) { ExitSignal.notify_all(); } return Stopped; } }; spdlog::logger& m_Log; UpstreamCacheOptions m_Options; ZenCacheStore& m_CacheStore; CidStore& m_CidStore; UpstreamQueue m_UpstreamQueue; std::shared_mutex m_EndpointsMutex; std::vector> m_Endpoints; std::vector m_UpstreamThreads; std::thread m_EndpointMonitorThread; RunState m_RunState; }; ////////////////////////////////////////////////////////////////////////// std::unique_ptr MakeUpstreamCache(const UpstreamCacheOptions& Options, ZenCacheStore& CacheStore, CidStore& CidStore) { return std::make_unique(Options, CacheStore, CidStore); } std::unique_ptr MakeJupiterUpstreamEndpoint(const CloudCacheClientOptions& Options, AuthMgr& Mgr) { return std::make_unique(Options, Mgr); } std::unique_ptr MakeZenUpstreamEndpoint(const ZenStructuredCacheClientOptions& Options) { return std::make_unique(Options); } } // namespace zen