diff options
| author | Liam Mitchell <[email protected]> | 2026-03-09 19:06:36 -0700 |
|---|---|---|
| committer | Liam Mitchell <[email protected]> | 2026-03-09 19:06:36 -0700 |
| commit | d1abc50ee9d4fb72efc646e17decafea741caa34 (patch) | |
| tree | e4288e00f2f7ca0391b83d986efcb69d3ba66a83 /src/zenremotestore | |
| parent | Allow requests with invalid content-types unless specified in command line or... (diff) | |
| parent | updated chunk–block analyser (#818) (diff) | |
| download | zen-d1abc50ee9d4fb72efc646e17decafea741caa34.tar.xz zen-d1abc50ee9d4fb72efc646e17decafea741caa34.zip | |
Merge branch 'main' into lm/restrict-content-type
Diffstat (limited to 'src/zenremotestore')
34 files changed, 5356 insertions, 1867 deletions
diff --git a/src/zenremotestore/builds/buildmanifest.cpp b/src/zenremotestore/builds/buildmanifest.cpp index 051436e96..738e4b33b 100644 --- a/src/zenremotestore/builds/buildmanifest.cpp +++ b/src/zenremotestore/builds/buildmanifest.cpp @@ -97,6 +97,8 @@ ParseBuildManifest(const std::filesystem::path& ManifestPath) } #if ZEN_WITH_TESTS +TEST_SUITE_BEGIN("remotestore.buildmanifest"); + TEST_CASE("buildmanifest.unstructured") { ScopedTemporaryDirectory Root; @@ -163,6 +165,8 @@ TEST_CASE("buildmanifest.structured") CHECK_EQ(Manifest.Parts[1].Files[0].generic_string(), "baz.pdb"); } +TEST_SUITE_END(); + void buildmanifest_forcelink() { diff --git a/src/zenremotestore/builds/buildsavedstate.cpp b/src/zenremotestore/builds/buildsavedstate.cpp index 1d1f4605f..0685bf679 100644 --- a/src/zenremotestore/builds/buildsavedstate.cpp +++ b/src/zenremotestore/builds/buildsavedstate.cpp @@ -588,6 +588,8 @@ namespace buildsavestate_test { } } // namespace buildsavestate_test +TEST_SUITE_BEGIN("remotestore.buildsavedstate"); + TEST_CASE("buildsavestate.BuildsSelection") { using namespace buildsavestate_test; @@ -696,6 +698,8 @@ TEST_CASE("buildsavestate.DownloadedPaths") } } +TEST_SUITE_END(); + #endif // ZEN_WITH_TESTS } // namespace zen diff --git a/src/zenremotestore/builds/buildstoragecache.cpp b/src/zenremotestore/builds/buildstoragecache.cpp index 07fcd62ba..00765903d 100644 --- a/src/zenremotestore/builds/buildstoragecache.cpp +++ b/src/zenremotestore/builds/buildstoragecache.cpp @@ -151,7 +151,7 @@ public: auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); HttpClient::Response CacheResponse = - m_HttpClient.Upload(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash.ToHexString()), + m_HttpClient.Upload(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash), Payload, ContentType); @@ -180,7 +180,7 @@ public: } CreateDirectories(m_TempFolderPath); HttpClient::Response CacheResponse = - m_HttpClient.Download(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash.ToHexString()), + m_HttpClient.Download(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash), m_TempFolderPath, Headers); AddStatistic(CacheResponse); @@ -191,6 +191,74 @@ public: return {}; } + virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId, + const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_TRACE_CPU("ZenBuildStorageCache::GetBuildBlobRanges"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + + CbObjectWriter Writer; + Writer.BeginArray("ranges"sv); + { + for (const std::pair<uint64_t, uint64_t>& Range : Ranges) + { + Writer.BeginObject(); + { + Writer.AddInteger("offset"sv, Range.first); + Writer.AddInteger("length"sv, Range.second); + } + Writer.EndObject(); + } + } + Writer.EndArray(); // ranges + + CreateDirectories(m_TempFolderPath); + HttpClient::Response CacheResponse = + m_HttpClient.Post(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash), + Writer.Save(), + HttpClient::Accept(ZenContentType::kCbPackage)); + AddStatistic(CacheResponse); + if (CacheResponse.IsSuccess()) + { + CbPackage ResponsePackage = ParsePackageMessage(CacheResponse.ResponsePayload); + CbObjectView ResponseObject = ResponsePackage.GetObject(); + + CbArrayView RangeArray = ResponseObject["ranges"sv].AsArrayView(); + + std::vector<std::pair<uint64_t, uint64_t>> ReceivedRanges; + ReceivedRanges.reserve(RangeArray.Num()); + + uint64_t OffsetInPayloadRanges = 0; + + for (CbFieldView View : RangeArray) + { + CbObjectView RangeView = View.AsObjectView(); + uint64_t Offset = RangeView["offset"sv].AsUInt64(); + uint64_t Length = RangeView["length"sv].AsUInt64(); + + const std::pair<uint64_t, uint64_t>& Range = Ranges[ReceivedRanges.size()]; + + if (Offset != Range.first || Length != Range.second) + { + return {}; + } + ReceivedRanges.push_back(std::make_pair(OffsetInPayloadRanges, Length)); + OffsetInPayloadRanges += Length; + } + + const CbAttachment* DataAttachment = ResponsePackage.FindAttachment(RawHash); + if (DataAttachment) + { + SharedBuffer PayloadRanges = DataAttachment->AsBinary(); + return BuildBlobRanges{.PayloadBuffer = PayloadRanges.AsIoBuffer(), .Ranges = std::move(ReceivedRanges)}; + } + } + return {}; + } + virtual void PutBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes, std::span<const CbObject> MetaDatas) override { ZEN_ASSERT(!IsFlushed); @@ -460,6 +528,192 @@ CreateZenBuildStorageCache(HttpClient& HttpClient, return std::make_unique<ZenBuildStorageCache>(HttpClient, Stats, Namespace, Bucket, TempFolderPath, BackgroundWorkerPool); } +#if ZEN_WITH_TESTS + +class InMemoryBuildStorageCache : public BuildStorageCache +{ +public: + // MaxRangeSupported == 0 : no range requests are accepted, always return full blob + // MaxRangeSupported == 1 : single range is supported, multi range returns full blob + // MaxRangeSupported > 1 : multirange is supported up to MaxRangeSupported, more ranges returns empty blob (bad request) + explicit InMemoryBuildStorageCache(uint64_t MaxRangeSupported, + BuildStorageCache::Statistics& Stats, + double LatencySec = 0.0, + double DelayPerKBSec = 0.0) + : m_MaxRangeSupported(MaxRangeSupported) + , m_Stats(Stats) + , m_LatencySec(LatencySec) + , m_DelayPerKBSec(DelayPerKBSec) + { + } + void PutBuildBlob(const Oid&, const IoHash& RawHash, ZenContentType, const CompositeBuffer& Payload) override + { + IoBuffer Buf = Payload.Flatten().AsIoBuffer(); + Buf.MakeOwned(); + const uint64_t SentBytes = Buf.Size(); + uint64_t ReceivedBytes = 0; + SimulateLatency(SentBytes, 0); + auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); }); + Stopwatch ExecutionTimer; + auto __ = MakeGuard([&]() { AddStatistic(ExecutionTimer.GetElapsedTimeUs(), ReceivedBytes, SentBytes); }); + { + std::lock_guard Lock(m_Mutex); + m_Entries[RawHash] = std::move(Buf); + } + m_Stats.PutBlobCount.fetch_add(1); + m_Stats.PutBlobByteCount.fetch_add(SentBytes); + } + + IoBuffer GetBuildBlob(const Oid&, const IoHash& RawHash, uint64_t RangeOffset = 0, uint64_t RangeBytes = (uint64_t)-1) override + { + uint64_t SentBytes = 0; + uint64_t ReceivedBytes = 0; + SimulateLatency(SentBytes, 0); + auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); }); + Stopwatch ExecutionTimer; + auto __ = MakeGuard([&]() { AddStatistic(ExecutionTimer.GetElapsedTimeUs(), ReceivedBytes, SentBytes); }); + IoBuffer FullPayload; + { + std::lock_guard Lock(m_Mutex); + auto It = m_Entries.find(RawHash); + if (It == m_Entries.end()) + { + return {}; + } + FullPayload = It->second; + } + + if (RangeOffset != 0 || RangeBytes != (uint64_t)-1) + { + if (m_MaxRangeSupported == 0) + { + ReceivedBytes = FullPayload.Size(); + return FullPayload; + } + else + { + ReceivedBytes = (RangeBytes == (uint64_t)-1) ? FullPayload.Size() - RangeOffset : RangeBytes; + return IoBuffer(FullPayload, RangeOffset, RangeBytes); + } + } + else + { + ReceivedBytes = FullPayload.Size(); + return FullPayload; + } + } + + BuildBlobRanges GetBuildBlobRanges(const Oid&, const IoHash& RawHash, std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_ASSERT(!Ranges.empty()); + uint64_t SentBytes = 0; + uint64_t ReceivedBytes = 0; + SimulateLatency(SentBytes, 0); + auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); }); + Stopwatch ExecutionTimer; + auto __ = MakeGuard([&]() { AddStatistic(ExecutionTimer.GetElapsedTimeUs(), ReceivedBytes, SentBytes); }); + if (m_MaxRangeSupported > 1 && Ranges.size() > m_MaxRangeSupported) + { + return {}; + } + IoBuffer FullPayload; + { + std::lock_guard Lock(m_Mutex); + auto It = m_Entries.find(RawHash); + if (It == m_Entries.end()) + { + return {}; + } + FullPayload = It->second; + } + + if (Ranges.size() > m_MaxRangeSupported) + { + // An empty Ranges signals to the caller: "full buffer given, use it for all requested ranges". + ReceivedBytes = FullPayload.Size(); + return {.PayloadBuffer = FullPayload}; + } + else + { + uint64_t PayloadStart = Ranges.front().first; + uint64_t PayloadSize = Ranges.back().first + Ranges.back().second - PayloadStart; + IoBuffer RangeBuffer = IoBuffer(FullPayload, PayloadStart, PayloadSize); + std::vector<std::pair<uint64_t, uint64_t>> PayloadRanges; + PayloadRanges.reserve(Ranges.size()); + for (const std::pair<uint64_t, uint64_t>& Range : Ranges) + { + PayloadRanges.push_back(std::make_pair(Range.first - PayloadStart, Range.second)); + } + ReceivedBytes = PayloadSize; + return {.PayloadBuffer = RangeBuffer, .Ranges = std::move(PayloadRanges)}; + } + } + + void PutBlobMetadatas(const Oid&, std::span<const IoHash>, std::span<const CbObject>) override {} + + std::vector<CbObject> GetBlobMetadatas(const Oid&, std::span<const IoHash> Hashes) override + { + return std::vector<CbObject>(Hashes.size()); + } + + std::vector<BlobExistsResult> BlobsExists(const Oid&, std::span<const IoHash> Hashes) override + { + std::lock_guard Lock(m_Mutex); + std::vector<BlobExistsResult> Result; + Result.reserve(Hashes.size()); + for (const IoHash& Hash : Hashes) + { + auto It = m_Entries.find(Hash); + Result.push_back({.HasBody = (It != m_Entries.end() && It->second)}); + } + return Result; + } + + void Flush(int32_t, std::function<bool(intptr_t)>&&) override {} + +private: + void AddStatistic(uint64_t ElapsedTimeUs, uint64_t ReceivedBytes, uint64_t SentBytes) + { + m_Stats.TotalBytesWritten += SentBytes; + m_Stats.TotalBytesRead += ReceivedBytes; + m_Stats.TotalExecutionTimeUs += ElapsedTimeUs; + m_Stats.TotalRequestCount++; + SetAtomicMax(m_Stats.PeakSentBytes, SentBytes); + SetAtomicMax(m_Stats.PeakReceivedBytes, ReceivedBytes); + if (ElapsedTimeUs > 0) + { + SetAtomicMax(m_Stats.PeakBytesPerSec, (ReceivedBytes + SentBytes) * 1000000 / ElapsedTimeUs); + } + } + + void SimulateLatency(uint64_t SendBytes, uint64_t ReceiveBytes) + { + double SleepSec = m_LatencySec; + if (m_DelayPerKBSec > 0.0) + { + SleepSec += m_DelayPerKBSec * (double(SendBytes + ReceiveBytes) / 1024u); + } + if (SleepSec > 0) + { + Sleep(int(SleepSec * 1000)); + } + } + + uint64_t m_MaxRangeSupported = 0; + BuildStorageCache::Statistics& m_Stats; + const double m_LatencySec = 0.0; + const double m_DelayPerKBSec = 0.0; + std::mutex m_Mutex; + std::unordered_map<IoHash, IoBuffer, IoHash::Hasher> m_Entries; +}; + +std::unique_ptr<BuildStorageCache> +CreateInMemoryBuildStorageCache(uint64_t MaxRangeSupported, BuildStorageCache::Statistics& Stats, double LatencySec, double DelayPerKBSec) +{ + return std::make_unique<InMemoryBuildStorageCache>(MaxRangeSupported, Stats, LatencySec, DelayPerKBSec); +} +#endif // ZEN_WITH_TESTS + ZenCacheEndpointTestResult TestZenCacheEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const bool HttpVerbose) { @@ -474,9 +728,28 @@ TestZenCacheEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const boo HttpClient::Response TestResponse = TestHttpClient.Get("/status/builds"); if (TestResponse.IsSuccess()) { - return {.Success = true}; + uint64_t MaxRangeCountPerRequest = 1; + CbObject StatusResponse = TestResponse.AsObject(); + if (StatusResponse["ok"].AsBool()) + { + MaxRangeCountPerRequest = StatusResponse["capabilities"].AsObjectView()["maxrangecountperrequest"].AsUInt64(1); + + LatencyTestResult LatencyResult = MeasureLatency(TestHttpClient, "/health"); + + if (!LatencyResult.Success) + { + return {.Success = false, .FailureReason = LatencyResult.FailureReason}; + } + + return {.Success = true, .LatencySeconds = LatencyResult.LatencySeconds, .MaxRangeCountPerRequest = MaxRangeCountPerRequest}; + } + else + { + return {.Success = false, + .FailureReason = fmt::format("ZenCache endpoint {}/status/builds did not respond with \"ok\"", BaseUrl)}; + } } return {.Success = false, .FailureReason = TestResponse.ErrorMessage("")}; -}; +} } // namespace zen diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp index 2319ad66d..f4b167b73 100644 --- a/src/zenremotestore/builds/buildstorageoperations.cpp +++ b/src/zenremotestore/builds/buildstorageoperations.cpp @@ -38,6 +38,7 @@ ZEN_THIRD_PARTY_INCLUDES_END #if ZEN_WITH_TESTS # include <zencore/testing.h> # include <zencore/testutils.h> +# include <zenhttp/httpclientauth.h> # include <zenremotestore/builds/filebuildstorage.h> #endif // ZEN_WITH_TESTS @@ -484,24 +485,6 @@ private: uint64_t FilteredPerSecond = 0; }; -EPartialBlockRequestMode -PartialBlockRequestModeFromString(const std::string_view ModeString) -{ - switch (HashStringAsLowerDjb2(ModeString)) - { - case HashStringDjb2("false"): - return EPartialBlockRequestMode::Off; - case HashStringDjb2("zencacheonly"): - return EPartialBlockRequestMode::ZenCacheOnly; - case HashStringDjb2("mixed"): - return EPartialBlockRequestMode::Mixed; - case HashStringDjb2("true"): - return EPartialBlockRequestMode::All; - default: - return EPartialBlockRequestMode::Invalid; - } -} - std::filesystem::path ZenStateFilePath(const std::filesystem::path& ZenFolderPath) { @@ -579,13 +562,6 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) CreateDirectories(m_TempDownloadFolderPath); CreateDirectories(m_TempBlockFolderPath); - Stopwatch IndexTimer; - - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Indexed local and remote content in {}", NiceTimeSpanMs(IndexTimer.GetElapsedTimeMs())); - } - Stopwatch CacheMappingTimer; std::vector<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters(m_RemoteContent.ChunkedContent.SequenceRawHashes.size()); @@ -906,343 +882,240 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) CheckRequiredDiskSpace(RemotePathToRemoteIndex); + BlobsExistsResult ExistsResult; { - ZEN_TRACE_CPU("WriteChunks"); - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::WriteChunks, (uint32_t)TaskSteps::StepCount); - - Stopwatch WriteTimer; - - FilteredRate FilteredDownloadedBytesPerSecond; - FilteredRate FilteredWrittenBytesPerSecond; - - std::unique_ptr<OperationLogOutput::ProgressBar> WriteProgressBarPtr( - m_LogOutput.CreateProgressBar(m_Options.PrimeCacheOnly ? "Downloading" : "Writing")); - OperationLogOutput::ProgressBar& WriteProgressBar(*WriteProgressBarPtr); - ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + ChunkBlockAnalyser BlockAnalyser( + m_LogOutput, + m_BlockDescriptions, + ChunkBlockAnalyser::Options{.IsQuiet = m_Options.IsQuiet, + .IsVerbose = m_Options.IsVerbose, + .HostLatencySec = m_Storage.BuildStorageHost.LatencySec, + .HostHighSpeedLatencySec = m_Storage.CacheHost.LatencySec, + .HostMaxRangeCountPerRequest = m_Storage.BuildStorageHost.Caps.MaxRangeCountPerRequest, + .HostHighSpeedMaxRangeCountPerRequest = m_Storage.CacheHost.Caps.MaxRangeCountPerRequest}); - struct LooseChunkHashWorkData - { - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs; - uint32_t RemoteChunkIndex = (uint32_t)-1; - }; + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = BlockAnalyser.GetNeeded( + m_RemoteLookup.ChunkHashToChunkIndex, + [&](uint32_t RemoteChunkIndex) -> bool { return RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]; }); - std::vector<LooseChunkHashWorkData> LooseChunkHashWorks; - TotalPartWriteCount += CopyChunkDatas.size(); - TotalPartWriteCount += ScavengedSequenceCopyOperations.size(); + std::vector<uint32_t> FetchBlockIndexes; + std::vector<uint32_t> CachedChunkBlockIndexes; - for (const IoHash ChunkHash : m_LooseChunkHashes) { - auto RemoteChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); - ZEN_ASSERT(RemoteChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end()); - const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second; - if (RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + ZEN_TRACE_CPU("BlockCacheFileExists"); + for (const ChunkBlockAnalyser::NeededBlock& NeededBlock : NeededBlocks) { - if (m_Options.IsVerbose) + if (m_Options.PrimeCacheOnly) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Skipping chunk {} due to cache reuse", ChunkHash); - } - continue; - } - bool NeedsCopy = true; - if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex].compare_exchange_strong(NeedsCopy, false)) - { - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = - GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex); - - if (ChunkTargetPtrs.empty()) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Skipping chunk {} due to cache reuse", ChunkHash); - } + FetchBlockIndexes.push_back(NeededBlock.BlockIndex); } else { - TotalRequestCount++; - TotalPartWriteCount++; - LooseChunkHashWorks.push_back( - LooseChunkHashWorkData{.ChunkTargetPtrs = ChunkTargetPtrs, .RemoteChunkIndex = RemoteChunkIndex}); - } - } - } - - uint32_t BlockCount = gsl::narrow<uint32_t>(m_BlockDescriptions.size()); - - std::vector<bool> ChunkIsPickedUpByBlock(m_RemoteContent.ChunkedContent.ChunkHashes.size(), false); - auto GetNeededChunkBlockIndexes = [this, &RemoteChunkIndexNeedsCopyFromSourceFlags, &ChunkIsPickedUpByBlock]( - const ChunkBlockDescription& BlockDescription) { - ZEN_TRACE_CPU("GetNeededChunkBlockIndexes"); - std::vector<uint32_t> NeededBlockChunkIndexes; - for (uint32_t ChunkBlockIndex = 0; ChunkBlockIndex < BlockDescription.ChunkRawHashes.size(); ChunkBlockIndex++) - { - const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; - if (auto It = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != m_RemoteLookup.ChunkHashToChunkIndex.end()) - { - const uint32_t RemoteChunkIndex = It->second; - if (!ChunkIsPickedUpByBlock[RemoteChunkIndex]) + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex]; + bool UsingCachedBlock = false; + if (auto It = CachedBlocksFound.find(BlockDescription.BlockHash); It != CachedBlocksFound.end()) { - if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]) + TotalPartWriteCount++; + + std::filesystem::path BlockPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString(); + if (IsFile(BlockPath)) { - ChunkIsPickedUpByBlock[RemoteChunkIndex] = true; - NeededBlockChunkIndexes.push_back(ChunkBlockIndex); + CachedChunkBlockIndexes.push_back(NeededBlock.BlockIndex); + UsingCachedBlock = true; } } - } - else - { - ZEN_DEBUG("Chunk {} not found in block {}", ChunkHash, BlockDescription.BlockHash); + if (!UsingCachedBlock) + { + FetchBlockIndexes.push_back(NeededBlock.BlockIndex); + } } } - return NeededBlockChunkIndexes; - }; + } - std::vector<uint32_t> CachedChunkBlockIndexes; - std::vector<uint32_t> FetchBlockIndexes; - std::vector<std::vector<uint32_t>> AllBlockChunkIndexNeeded; + std::vector<uint32_t> NeededLooseChunkIndexes; - for (uint32_t BlockIndex = 0; BlockIndex < BlockCount; BlockIndex++) { - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - - std::vector<uint32_t> BlockChunkIndexNeeded = GetNeededChunkBlockIndexes(BlockDescription); - if (!BlockChunkIndexNeeded.empty()) + NeededLooseChunkIndexes.reserve(m_LooseChunkHashes.size()); + for (uint32_t LooseChunkIndex = 0; LooseChunkIndex < m_LooseChunkHashes.size(); LooseChunkIndex++) { - if (m_Options.PrimeCacheOnly) + const IoHash& ChunkHash = m_LooseChunkHashes[LooseChunkIndex]; + auto RemoteChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(RemoteChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end()); + const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second; + + if (RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) { - FetchBlockIndexes.push_back(BlockIndex); + if (m_Options.IsVerbose) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Skipping chunk {} due to cache reuse", + m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]); + } + continue; } - else + + bool NeedsCopy = true; + if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex].compare_exchange_strong(NeedsCopy, false)) { - bool UsingCachedBlock = false; - if (auto It = CachedBlocksFound.find(BlockDescription.BlockHash); It != CachedBlocksFound.end()) + uint64_t WriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex); + if (WriteCount == 0) { - TotalPartWriteCount++; - - std::filesystem::path BlockPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString(); - if (IsFile(BlockPath)) + if (m_Options.IsVerbose) { - CachedChunkBlockIndexes.push_back(BlockIndex); - UsingCachedBlock = true; + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Skipping chunk {} due to cache reuse", + m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]); } } - if (!UsingCachedBlock) + else { - FetchBlockIndexes.push_back(BlockIndex); + NeededLooseChunkIndexes.push_back(LooseChunkIndex); } } } - AllBlockChunkIndexNeeded.emplace_back(std::move(BlockChunkIndexNeeded)); } - BlobsExistsResult ExistsResult; - - if (m_Storage.BuildCacheStorage) + if (m_Storage.CacheStorage) { ZEN_TRACE_CPU("BlobCacheExistCheck"); Stopwatch Timer; - tsl::robin_set<IoHash> BlobHashesSet; + std::vector<IoHash> BlobHashes; + BlobHashes.reserve(NeededLooseChunkIndexes.size() + FetchBlockIndexes.size()); - BlobHashesSet.reserve(LooseChunkHashWorks.size() + FetchBlockIndexes.size()); - for (LooseChunkHashWorkData& LooseChunkHashWork : LooseChunkHashWorks) + for (const uint32_t LooseChunkIndex : NeededLooseChunkIndexes) { - BlobHashesSet.insert(m_RemoteContent.ChunkedContent.ChunkHashes[LooseChunkHashWork.RemoteChunkIndex]); + BlobHashes.push_back(m_LooseChunkHashes[LooseChunkIndex]); } + for (uint32_t BlockIndex : FetchBlockIndexes) { - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - BlobHashesSet.insert(BlockDescription.BlockHash); + BlobHashes.push_back(m_BlockDescriptions[BlockIndex].BlockHash); } - if (!BlobHashesSet.empty()) - { - const std::vector<IoHash> BlobHashes(BlobHashesSet.begin(), BlobHashesSet.end()); - const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult = - m_Storage.BuildCacheStorage->BlobsExists(m_BuildId, BlobHashes); + const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult = + m_Storage.CacheStorage->BlobsExists(m_BuildId, BlobHashes); - if (CacheExistsResult.size() == BlobHashes.size()) + if (CacheExistsResult.size() == BlobHashes.size()) + { + ExistsResult.ExistingBlobs.reserve(CacheExistsResult.size()); + for (size_t BlobIndex = 0; BlobIndex < BlobHashes.size(); BlobIndex++) { - ExistsResult.ExistingBlobs.reserve(CacheExistsResult.size()); - for (size_t BlobIndex = 0; BlobIndex < BlobHashes.size(); BlobIndex++) + if (CacheExistsResult[BlobIndex].HasBody) { - if (CacheExistsResult[BlobIndex].HasBody) - { - ExistsResult.ExistingBlobs.insert(BlobHashes[BlobIndex]); - } + ExistsResult.ExistingBlobs.insert(BlobHashes[BlobIndex]); } } - ExistsResult.ElapsedTimeMs = Timer.GetElapsedTimeMs(); - if (!ExistsResult.ExistingBlobs.empty() && !m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Remote cache : Found {} out of {} needed blobs in {}", - ExistsResult.ExistingBlobs.size(), - BlobHashes.size(), - NiceTimeSpanMs(ExistsResult.ElapsedTimeMs)); - } + } + ExistsResult.ElapsedTimeMs = Timer.GetElapsedTimeMs(); + if (!ExistsResult.ExistingBlobs.empty() && !m_Options.IsQuiet) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Remote cache : Found {} out of {} needed blobs in {}", + ExistsResult.ExistingBlobs.size(), + BlobHashes.size(), + NiceTimeSpanMs(ExistsResult.ElapsedTimeMs)); } } - std::vector<BlockRangeDescriptor> BlockRangeWorks; - std::vector<uint32_t> FullBlockWorks; + std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> BlockPartialDownloadModes; + + if (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::Off) { - Stopwatch Timer; + BlockPartialDownloadModes.resize(m_BlockDescriptions.size(), ChunkBlockAnalyser::EPartialBlockDownloadMode::Off); + } + else + { + ChunkBlockAnalyser::EPartialBlockDownloadMode CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off; + ChunkBlockAnalyser::EPartialBlockDownloadMode CachePartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off; - std::vector<uint32_t> PartialBlockIndexes; + switch (m_Options.PartialBlockRequestMode) + { + case EPartialBlockRequestMode::Off: + break; + case EPartialBlockRequestMode::ZenCacheOnly: + CachePartialDownloadMode = m_Storage.CacheHost.Caps.MaxRangeCountPerRequest > 1 + ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed + : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange; + CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off; + break; + case EPartialBlockRequestMode::Mixed: + CachePartialDownloadMode = m_Storage.CacheHost.Caps.MaxRangeCountPerRequest > 1 + ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed + : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange; + CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange; + break; + case EPartialBlockRequestMode::All: + CachePartialDownloadMode = m_Storage.CacheHost.Caps.MaxRangeCountPerRequest > 1 + ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed + : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange; + CloudPartialDownloadMode = m_Storage.BuildStorageHost.Caps.MaxRangeCountPerRequest > 1 + ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange + : ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange; + break; + default: + ZEN_ASSERT(false); + break; + } - for (uint32_t BlockIndex : FetchBlockIndexes) + BlockPartialDownloadModes.reserve(m_BlockDescriptions.size()); + for (uint32_t BlockIndex = 0; BlockIndex < m_BlockDescriptions.size(); BlockIndex++) { - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + const bool BlockExistInCache = ExistsResult.ExistingBlobs.contains(m_BlockDescriptions[BlockIndex].BlockHash); + BlockPartialDownloadModes.push_back(BlockExistInCache ? CachePartialDownloadMode : CloudPartialDownloadMode); + } + } - const std::vector<uint32_t> BlockChunkIndexNeeded = std::move(AllBlockChunkIndexNeeded[BlockIndex]); - if (!BlockChunkIndexNeeded.empty()) - { - bool WantsToDoPartialBlockDownload = BlockChunkIndexNeeded.size() < BlockDescription.ChunkRawHashes.size(); - bool CanDoPartialBlockDownload = - (BlockDescription.HeaderSize > 0) && - (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size()); - - bool AllowedToDoPartialRequest = false; - bool BlockExistInCache = ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash); - switch (m_Options.PartialBlockRequestMode) - { - case EPartialBlockRequestMode::Off: - break; - case EPartialBlockRequestMode::ZenCacheOnly: - AllowedToDoPartialRequest = BlockExistInCache; - break; - case EPartialBlockRequestMode::Mixed: - case EPartialBlockRequestMode::All: - AllowedToDoPartialRequest = true; - break; - default: - ZEN_ASSERT(false); - break; - } + ZEN_ASSERT(BlockPartialDownloadModes.size() == m_BlockDescriptions.size()); - const uint32_t ChunkStartOffsetInBlock = - gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + ChunkBlockAnalyser::BlockResult PartialBlocks = + BlockAnalyser.CalculatePartialBlockDownloads(NeededBlocks, BlockPartialDownloadModes); - const uint64_t TotalBlockSize = std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), - BlockDescription.ChunkCompressedLengths.end(), - std::uint64_t(ChunkStartOffsetInBlock)); + struct LooseChunkHashWorkData + { + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs; + uint32_t RemoteChunkIndex = (uint32_t)-1; + }; - if (AllowedToDoPartialRequest && WantsToDoPartialBlockDownload && CanDoPartialBlockDownload) - { - ZEN_TRACE_CPU("PartialBlockAnalysis"); - - bool LimitToSingleRange = - BlockExistInCache ? false : m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::Mixed; - uint64_t TotalWantedChunksSize = 0; - std::optional<std::vector<BlockRangeDescriptor>> MaybeBlockRanges = - CalculateBlockRanges(BlockIndex, - BlockDescription, - BlockChunkIndexNeeded, - LimitToSingleRange, - ChunkStartOffsetInBlock, - TotalBlockSize, - TotalWantedChunksSize); - ZEN_ASSERT(TotalWantedChunksSize <= TotalBlockSize); - - if (MaybeBlockRanges.has_value()) - { - const std::vector<BlockRangeDescriptor>& BlockRanges = MaybeBlockRanges.value(); - ZEN_ASSERT(!BlockRanges.empty()); - BlockRangeWorks.insert(BlockRangeWorks.end(), BlockRanges.begin(), BlockRanges.end()); - TotalRequestCount += BlockRanges.size(); - TotalPartWriteCount += BlockRanges.size(); - - uint64_t RequestedSize = std::accumulate( - BlockRanges.begin(), - BlockRanges.end(), - uint64_t(0), - [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; }); - PartialBlockIndexes.push_back(BlockIndex); - - if (RequestedSize > TotalWantedChunksSize) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Requesting {} chunks ({}) from block {} ({}) using {} requests (extra bytes {})", - BlockChunkIndexNeeded.size(), - NiceBytes(RequestedSize), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - BlockRanges.size(), - NiceBytes(RequestedSize - TotalWantedChunksSize)); - } - } - } - else - { - FullBlockWorks.push_back(BlockIndex); - TotalRequestCount++; - TotalPartWriteCount++; - } - } - else - { - FullBlockWorks.push_back(BlockIndex); - TotalRequestCount++; - TotalPartWriteCount++; - } - } - } + TotalRequestCount += NeededLooseChunkIndexes.size(); + TotalPartWriteCount += NeededLooseChunkIndexes.size(); + TotalRequestCount += PartialBlocks.BlockRanges.size(); + TotalPartWriteCount += PartialBlocks.BlockRanges.size(); + TotalRequestCount += PartialBlocks.FullBlockIndexes.size(); + TotalPartWriteCount += PartialBlocks.FullBlockIndexes.size(); - if (!PartialBlockIndexes.empty()) - { - uint64_t TotalFullBlockRequestBytes = 0; - for (uint32_t BlockIndex : FullBlockWorks) - { - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - uint32_t CurrentOffset = - gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + std::vector<LooseChunkHashWorkData> LooseChunkHashWorks; + for (uint32_t LooseChunkIndex : NeededLooseChunkIndexes) + { + const IoHash& ChunkHash = m_LooseChunkHashes[LooseChunkIndex]; + auto RemoteChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(RemoteChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end()); + const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second; - TotalFullBlockRequestBytes += std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), - BlockDescription.ChunkCompressedLengths.end(), - std::uint64_t(CurrentOffset)); - } + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex); - uint64_t TotalPartialBlockBytes = 0; - for (uint32_t BlockIndex : PartialBlockIndexes) - { - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - uint32_t CurrentOffset = - gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + ZEN_ASSERT(!ChunkTargetPtrs.empty()); + LooseChunkHashWorks.push_back( + LooseChunkHashWorkData{.ChunkTargetPtrs = ChunkTargetPtrs, .RemoteChunkIndex = RemoteChunkIndex}); + } - TotalPartialBlockBytes += std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), - BlockDescription.ChunkCompressedLengths.end(), - std::uint64_t(CurrentOffset)); - } + ZEN_TRACE_CPU("WriteChunks"); - uint64_t NonPartialTotalBlockBytes = TotalFullBlockRequestBytes + TotalPartialBlockBytes; + m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::WriteChunks, (uint32_t)TaskSteps::StepCount); - const uint64_t TotalPartialBlockRequestBytes = - std::accumulate(BlockRangeWorks.begin(), - BlockRangeWorks.end(), - uint64_t(0), - [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; }); - uint64_t TotalExtraPartialBlocksRequests = BlockRangeWorks.size() - PartialBlockIndexes.size(); + Stopwatch WriteTimer; - uint64_t TotalSavedBlocksSize = TotalPartialBlockBytes - TotalPartialBlockRequestBytes; - double SavedSizePercent = (TotalSavedBlocksSize * 100.0) / NonPartialTotalBlockBytes; + FilteredRate FilteredDownloadedBytesPerSecond; + FilteredRate FilteredWrittenBytesPerSecond; - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Analysis of partial block requests saves download of {} out of {} ({:.1f}%) using {} extra " - "requests. Completed in {}", - NiceBytes(TotalSavedBlocksSize), - NiceBytes(NonPartialTotalBlockBytes), - SavedSizePercent, - TotalExtraPartialBlocksRequests, - NiceTimeSpanMs(ExistsResult.ElapsedTimeMs)); - } - } - } + std::unique_ptr<OperationLogOutput::ProgressBar> WriteProgressBarPtr( + m_LogOutput.CreateProgressBar(m_Options.PrimeCacheOnly ? "Downloading" : "Writing")); + OperationLogOutput::ProgressBar& WriteProgressBar(*WriteProgressBarPtr); + ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + TotalPartWriteCount += CopyChunkDatas.size(); + TotalPartWriteCount += ScavengedSequenceCopyOperations.size(); BufferedWriteFileCache WriteCache; @@ -1472,13 +1345,23 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) }); } - for (size_t BlockRangeIndex = 0; BlockRangeIndex < BlockRangeWorks.size(); BlockRangeIndex++) + for (size_t BlockRangeIndex = 0; BlockRangeIndex < PartialBlocks.BlockRanges.size();) { ZEN_ASSERT(!m_Options.PrimeCacheOnly); if (m_AbortFlag) { break; } + + size_t RangeCount = 1; + size_t RangesLeft = PartialBlocks.BlockRanges.size() - BlockRangeIndex; + const ChunkBlockAnalyser::BlockRangeDescriptor& CurrentBlockRange = PartialBlocks.BlockRanges[BlockRangeIndex]; + while (RangeCount < RangesLeft && + CurrentBlockRange.BlockIndex == PartialBlocks.BlockRanges[BlockRangeIndex + RangeCount].BlockIndex) + { + RangeCount++; + } + Work.ScheduleWork( m_NetworkPool, [this, @@ -1492,18 +1375,19 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) TotalPartWriteCount, &FilteredWrittenBytesPerSecond, &Work, - &BlockRangeWorks, - BlockRangeIndex](std::atomic<bool>&) { + &PartialBlocks, + BlockRangeStartIndex = BlockRangeIndex, + RangeCount = RangeCount](std::atomic<bool>&) { if (!m_AbortFlag) { - ZEN_TRACE_CPU("Async_GetPartialBlock"); - - const BlockRangeDescriptor& BlockRange = BlockRangeWorks[BlockRangeIndex]; + ZEN_TRACE_CPU("Async_GetPartialBlockRanges"); FilteredDownloadedBytesPerSecond.Start(); DownloadPartialBlock( - BlockRange, + PartialBlocks.BlockRanges, + BlockRangeStartIndex, + RangeCount, ExistsResult, [this, &RemoteChunkIndexNeedsCopyFromSourceFlags, @@ -1515,7 +1399,10 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) TotalPartWriteCount, &FilteredDownloadedBytesPerSecond, &FilteredWrittenBytesPerSecond, - &BlockRange](IoBuffer&& InMemoryBuffer, const std::filesystem::path& OnDiskPath) { + &PartialBlocks](IoBuffer&& InMemoryBuffer, + const std::filesystem::path& OnDiskPath, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths) { if (m_DownloadStats.RequestsCompleteCount == TotalRequestCount) { FilteredDownloadedBytesPerSecond.Stop(); @@ -1533,14 +1420,18 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) &Work, TotalPartWriteCount, &FilteredWrittenBytesPerSecond, - &BlockRange, + &PartialBlocks, + BlockRangeStartIndex, BlockChunkPath = std::filesystem::path(OnDiskPath), - BlockPartialBuffer = std::move(InMemoryBuffer)](std::atomic<bool>&) mutable { + BlockPartialBuffer = std::move(InMemoryBuffer), + OffsetAndLengths = std::vector<std::pair<uint64_t, uint64_t>>(OffsetAndLengths.begin(), + OffsetAndLengths.end())]( + std::atomic<bool>&) mutable { if (!m_AbortFlag) { ZEN_TRACE_CPU("Async_WritePartialBlock"); - const uint32_t BlockIndex = BlockRange.BlockIndex; + const uint32_t BlockIndex = PartialBlocks.BlockRanges[BlockRangeStartIndex].BlockIndex; const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; @@ -1563,22 +1454,41 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) FilteredWrittenBytesPerSecond.Start(); - if (!WritePartialBlockChunksToCache( - BlockDescription, - SequenceIndexChunksLeftToWriteCounters, - Work, - CompositeBuffer(std::move(BlockPartialBuffer)), - BlockRange.ChunkBlockIndexStart, - BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount - 1, - RemoteChunkIndexNeedsCopyFromSourceFlags, - WriteCache)) + size_t RangeCount = OffsetAndLengths.size(); + + for (size_t PartialRangeIndex = 0; PartialRangeIndex < RangeCount; PartialRangeIndex++) { - std::error_code DummyEc; - RemoveFile(BlockChunkPath, DummyEc); - throw std::runtime_error( - fmt::format("Partial block {} is malformed", BlockDescription.BlockHash)); - } + const std::pair<uint64_t, uint64_t>& OffsetAndLength = + OffsetAndLengths[PartialRangeIndex]; + IoBuffer BlockRangeBuffer(BlockPartialBuffer, + OffsetAndLength.first, + OffsetAndLength.second); + + const ChunkBlockAnalyser::BlockRangeDescriptor& RangeDescriptor = + PartialBlocks.BlockRanges[BlockRangeStartIndex + PartialRangeIndex]; + + if (!WritePartialBlockChunksToCache(BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + Work, + CompositeBuffer(std::move(BlockRangeBuffer)), + RangeDescriptor.ChunkBlockIndexStart, + RangeDescriptor.ChunkBlockIndexStart + + RangeDescriptor.ChunkBlockIndexCount - 1, + RemoteChunkIndexNeedsCopyFromSourceFlags, + WriteCache)) + { + std::error_code DummyEc; + RemoveFile(BlockChunkPath, DummyEc); + throw std::runtime_error( + fmt::format("Partial block {} is malformed", BlockDescription.BlockHash)); + } + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } std::error_code Ec = TryRemoveFile(BlockChunkPath); if (Ec) { @@ -1588,12 +1498,6 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) Ec.value(), Ec.message()); } - - WritePartsComplete++; - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } } }, OnDiskPath.empty() ? WorkerThreadPool::EMode::DisableBacklog @@ -1602,9 +1506,10 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) }); } }); + BlockRangeIndex += RangeCount; } - for (uint32_t BlockIndex : FullBlockWorks) + for (uint32_t BlockIndex : PartialBlocks.FullBlockIndexes) { if (m_AbortFlag) { @@ -1641,20 +1546,20 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) IoBuffer BlockBuffer; const bool ExistsInCache = - m_Storage.BuildCacheStorage && ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash); + m_Storage.CacheStorage && ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash); if (ExistsInCache) { - BlockBuffer = m_Storage.BuildCacheStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash); + BlockBuffer = m_Storage.CacheStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash); } if (!BlockBuffer) { BlockBuffer = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash); - if (BlockBuffer && m_Storage.BuildCacheStorage && m_Options.PopulateCache) + if (BlockBuffer && m_Storage.CacheStorage && m_Options.PopulateCache) { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, - BlockDescription.BlockHash, - ZenContentType::kCompressedBinary, - CompositeBuffer(SharedBuffer(BlockBuffer))); + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, + BlockDescription.BlockHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(SharedBuffer(BlockBuffer))); } } if (!BlockBuffer) @@ -3217,10 +3122,10 @@ BuildsOperationUpdateFolder::DownloadBuildBlob(uint32_t RemoteChunkInde const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; // FilteredDownloadedBytesPerSecond.Start(); IoBuffer BuildBlob; - const bool ExistsInCache = m_Storage.BuildCacheStorage && ExistsResult.ExistingBlobs.contains(ChunkHash); + const bool ExistsInCache = m_Storage.CacheStorage && ExistsResult.ExistingBlobs.contains(ChunkHash); if (ExistsInCache) { - BuildBlob = m_Storage.BuildCacheStorage->GetBuildBlob(m_BuildId, ChunkHash); + BuildBlob = m_Storage.CacheStorage->GetBuildBlob(m_BuildId, ChunkHash); } if (BuildBlob) { @@ -3248,12 +3153,12 @@ BuildsOperationUpdateFolder::DownloadBuildBlob(uint32_t RemoteChunkInde m_DownloadStats.DownloadedChunkCount++; m_DownloadStats.RequestsCompleteCount++; - if (Payload && m_Storage.BuildCacheStorage && m_Options.PopulateCache) + if (Payload && m_Storage.CacheStorage && m_Options.PopulateCache) { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, - ChunkHash, - ZenContentType::kCompressedBinary, - CompositeBuffer(SharedBuffer(Payload))); + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, + ChunkHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(SharedBuffer(Payload))); } OnDownloaded(std::move(Payload)); @@ -3262,12 +3167,12 @@ BuildsOperationUpdateFolder::DownloadBuildBlob(uint32_t RemoteChunkInde else { BuildBlob = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, ChunkHash); - if (BuildBlob && m_Storage.BuildCacheStorage && m_Options.PopulateCache) + if (BuildBlob && m_Storage.CacheStorage && m_Options.PopulateCache) { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, - ChunkHash, - ZenContentType::kCompressedBinary, - CompositeBuffer(SharedBuffer(BuildBlob))); + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, + ChunkHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(SharedBuffer(BuildBlob))); } if (!BuildBlob) { @@ -3289,347 +3194,241 @@ BuildsOperationUpdateFolder::DownloadBuildBlob(uint32_t RemoteChunkInde } } -BuildsOperationUpdateFolder::BlockRangeDescriptor -BuildsOperationUpdateFolder::MergeBlockRanges(std::span<const BlockRangeDescriptor> Ranges) +void +BuildsOperationUpdateFolder::DownloadPartialBlock( + std::span<const ChunkBlockAnalyser::BlockRangeDescriptor> BlockRanges, + size_t BlockRangeStartIndex, + size_t BlockRangeCount, + const BlobsExistsResult& ExistsResult, + std::function<void(IoBuffer&& InMemoryBuffer, + const std::filesystem::path& OnDiskPath, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths)>&& OnDownloaded) { - ZEN_ASSERT(Ranges.size() > 1); - const BlockRangeDescriptor& First = Ranges.front(); - const BlockRangeDescriptor& Last = Ranges.back(); - - return BlockRangeDescriptor{.BlockIndex = First.BlockIndex, - .RangeStart = First.RangeStart, - .RangeLength = Last.RangeStart + Last.RangeLength - First.RangeStart, - .ChunkBlockIndexStart = First.ChunkBlockIndexStart, - .ChunkBlockIndexCount = Last.ChunkBlockIndexStart + Last.ChunkBlockIndexCount - First.ChunkBlockIndexStart}; -} + const uint32_t BlockIndex = BlockRanges[BlockRangeStartIndex].BlockIndex; -std::optional<std::vector<BuildsOperationUpdateFolder::BlockRangeDescriptor>> -BuildsOperationUpdateFolder::MakeOptionalBlockRangeVector(uint64_t TotalBlockSize, const BlockRangeDescriptor& Range) -{ - if (Range.RangeLength == TotalBlockSize) - { - return {}; - } - else - { - return std::vector<BlockRangeDescriptor>{Range}; - } -}; + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; -const BuildsOperationUpdateFolder::BlockRangeLimit* -BuildsOperationUpdateFolder::GetBlockRangeLimitForRange(std::span<const BlockRangeLimit> Limits, - uint64_t TotalBlockSize, - std::span<const BlockRangeDescriptor> Ranges) -{ - if (Ranges.size() > 1) - { - const std::uint64_t WantedSize = - std::accumulate(Ranges.begin(), Ranges.end(), uint64_t(0), [](uint64_t Current, const BlockRangeDescriptor& Range) { - return Current + Range.RangeLength; - }); + auto ProcessDownload = [this]( + const ChunkBlockDescription& BlockDescription, + IoBuffer&& BlockRangeBuffer, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> BlockOffsetAndLengths, + const std::function<void(IoBuffer && InMemoryBuffer, + const std::filesystem::path& OnDiskPath, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths)>& OnDownloaded) { + uint64_t BlockRangeBufferSize = BlockRangeBuffer.GetSize(); + m_DownloadStats.DownloadedBlockCount++; + m_DownloadStats.DownloadedBlockByteCount += BlockRangeBufferSize; + m_DownloadStats.RequestsCompleteCount += BlockOffsetAndLengths.size(); - const double RangeRequestedPercent = (WantedSize * 100.0) / TotalBlockSize; + std::filesystem::path BlockChunkPath; - for (const BlockRangeLimit& Limit : Limits) + // Check if the dowloaded block is file based and we can move it directly without rewriting it { - if (RangeRequestedPercent >= Limit.SizePercent && Ranges.size() > Limit.MaxRangeCount) + IoBufferFileReference FileRef; + if (BlockRangeBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && + (FileRef.FileChunkSize == BlockRangeBufferSize)) { - return &Limit; - } - } - } - return nullptr; -}; + ZEN_TRACE_CPU("MoveTempPartialBlock"); -std::vector<BuildsOperationUpdateFolder::BlockRangeDescriptor> -BuildsOperationUpdateFolder::CollapseBlockRanges(const uint64_t AlwaysAcceptableGap, std::span<const BlockRangeDescriptor> BlockRanges) -{ - ZEN_ASSERT(BlockRanges.size() > 1); - std::vector<BlockRangeDescriptor> CollapsedBlockRanges; + std::error_code Ec; + std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); + if (!Ec) + { + BlockRangeBuffer.SetDeleteOnClose(false); + BlockRangeBuffer = {}; - auto BlockRangesIt = BlockRanges.begin(); - CollapsedBlockRanges.push_back(*BlockRangesIt++); - for (; BlockRangesIt != BlockRanges.end(); BlockRangesIt++) - { - BlockRangeDescriptor& LastRange = CollapsedBlockRanges.back(); + IoHashStream RangeId; + for (const std::pair<uint64_t, uint64_t>& Range : BlockOffsetAndLengths) + { + RangeId.Append(&Range.first, sizeof(uint64_t)); + RangeId.Append(&Range.second, sizeof(uint64_t)); + } + + BlockChunkPath = m_TempBlockFolderPath / fmt::format("{}_{}", BlockDescription.BlockHash, RangeId.GetHash()); + RenameFile(TempBlobPath, BlockChunkPath, Ec); + if (Ec) + { + BlockChunkPath = std::filesystem::path{}; - const uint64_t BothRangeSize = BlockRangesIt->RangeLength + LastRange.RangeLength; + // Re-open the temp file again + BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); + BlockRangeBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockRangeBufferSize, true); + BlockRangeBuffer.SetDeleteOnClose(true); + } + } + } + } - const uint64_t Gap = BlockRangesIt->RangeStart - (LastRange.RangeStart + LastRange.RangeLength); - if (Gap <= Max(BothRangeSize / 16, AlwaysAcceptableGap)) + if (BlockChunkPath.empty() && (BlockRangeBufferSize > m_Options.MaximumInMemoryPayloadSize)) { - LastRange.ChunkBlockIndexCount = - (BlockRangesIt->ChunkBlockIndexStart + BlockRangesIt->ChunkBlockIndexCount) - LastRange.ChunkBlockIndexStart; - LastRange.RangeLength = (BlockRangesIt->RangeStart + BlockRangesIt->RangeLength) - LastRange.RangeStart; + ZEN_TRACE_CPU("WriteTempPartialBlock"); + + IoHashStream RangeId; + for (const std::pair<uint64_t, uint64_t>& Range : BlockOffsetAndLengths) + { + RangeId.Append(&Range.first, sizeof(uint64_t)); + RangeId.Append(&Range.second, sizeof(uint64_t)); + } + + // Could not be moved and rather large, lets store it on disk + BlockChunkPath = m_TempBlockFolderPath / fmt::format("{}_{}", BlockDescription.BlockHash, RangeId.GetHash()); + TemporaryFile::SafeWriteFile(BlockChunkPath, BlockRangeBuffer); + BlockRangeBuffer = {}; } - else + if (!m_AbortFlag) { - CollapsedBlockRanges.push_back(*BlockRangesIt); + OnDownloaded(std::move(BlockRangeBuffer), std::move(BlockChunkPath), BlockRangeStartIndex, BlockOffsetAndLengths); } - } - - return CollapsedBlockRanges; -}; + }; -uint64_t -BuildsOperationUpdateFolder::CalculateNextGap(std::span<const BlockRangeDescriptor> BlockRanges) -{ - ZEN_ASSERT(BlockRanges.size() > 1); - uint64_t AcceptableGap = (uint64_t)-1; - for (size_t RangeIndex = 0; RangeIndex < BlockRanges.size() - 1; RangeIndex++) + std::vector<std::pair<uint64_t, uint64_t>> Ranges; + Ranges.reserve(BlockRangeCount); + for (size_t BlockRangeIndex = BlockRangeStartIndex; BlockRangeIndex < BlockRangeStartIndex + BlockRangeCount; BlockRangeIndex++) { - const BlockRangeDescriptor& Range = BlockRanges[RangeIndex]; - const BlockRangeDescriptor& NextRange = BlockRanges[RangeIndex + 1]; - - const uint64_t Gap = NextRange.RangeStart - (Range.RangeStart + Range.RangeLength); - AcceptableGap = Min(Gap, AcceptableGap); + const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange = BlockRanges[BlockRangeIndex]; + Ranges.push_back(std::make_pair(BlockRange.RangeStart, BlockRange.RangeLength)); } - AcceptableGap = RoundUp(AcceptableGap, 16u * 1024u); - return AcceptableGap; -}; -std::optional<std::vector<BuildsOperationUpdateFolder::BlockRangeDescriptor>> -BuildsOperationUpdateFolder::CalculateBlockRanges(uint32_t BlockIndex, - const ChunkBlockDescription& BlockDescription, - std::span<const uint32_t> BlockChunkIndexNeeded, - bool LimitToSingleRange, - const uint64_t ChunkStartOffsetInBlock, - const uint64_t TotalBlockSize, - uint64_t& OutTotalWantedChunksSize) -{ - ZEN_TRACE_CPU("CalculateBlockRanges"); + const bool ExistsInCache = m_Storage.CacheStorage && ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash); - std::vector<BlockRangeDescriptor> BlockRanges; + size_t SubBlockRangeCount = BlockRangeCount; + size_t SubRangeCountComplete = 0; + std::span<const std::pair<uint64_t, uint64_t>> RangesSpan(Ranges); + while (SubRangeCountComplete < SubBlockRangeCount) { - uint64_t CurrentOffset = ChunkStartOffsetInBlock; - uint32_t ChunkBlockIndex = 0; - uint32_t NeedBlockChunkIndexOffset = 0; - BlockRangeDescriptor NextRange{.BlockIndex = BlockIndex}; - while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && ChunkBlockIndex < BlockDescription.ChunkRawHashes.size()) + if (m_AbortFlag) + { + break; + } + + // First try to get subrange from cache. + // If not successful, try to get the ranges from the build store and adapt SubRangeCount... + + size_t SubRangeStartIndex = BlockRangeStartIndex + SubRangeCountComplete; + if (ExistsInCache) { - const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; - if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + size_t SubRangeCount = Min(BlockRangeCount - SubRangeCountComplete, m_Storage.CacheHost.Caps.MaxRangeCountPerRequest); + + if (SubRangeCount == 1) { - if (NextRange.RangeLength > 0) + // Legacy single-range path, prefer that for max compatibility + + const std::pair<uint64_t, uint64_t> SubRange = RangesSpan[SubRangeCountComplete]; + IoBuffer PayloadBuffer = + m_Storage.CacheStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash, SubRange.first, SubRange.second); + if (m_AbortFlag) { - BlockRanges.push_back(NextRange); - NextRange = {.BlockIndex = BlockIndex}; + break; } - ChunkBlockIndex++; - CurrentOffset += ChunkCompressedLength; - } - else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) - { - if (NextRange.RangeLength == 0) + if (PayloadBuffer) { - NextRange.RangeStart = CurrentOffset; - NextRange.ChunkBlockIndexStart = ChunkBlockIndex; + ProcessDownload(BlockDescription, + std::move(PayloadBuffer), + SubRangeStartIndex, + std::vector<std::pair<uint64_t, uint64_t>>{std::make_pair(0u, SubRange.second)}, + OnDownloaded); + SubRangeCountComplete += SubRangeCount; + continue; } - NextRange.RangeLength += ChunkCompressedLength; - NextRange.ChunkBlockIndexCount++; - ChunkBlockIndex++; - CurrentOffset += ChunkCompressedLength; - NeedBlockChunkIndexOffset++; } else { - ZEN_ASSERT(false); - } - } - if (NextRange.RangeLength > 0) - { - BlockRanges.push_back(NextRange); - } - } - ZEN_ASSERT(!BlockRanges.empty()); - - OutTotalWantedChunksSize = - std::accumulate(BlockRanges.begin(), BlockRanges.end(), uint64_t(0), [](uint64_t Current, const BlockRangeDescriptor& Range) { - return Current + Range.RangeLength; - }); + auto SubRanges = RangesSpan.subspan(SubRangeCountComplete, SubRangeCount); - double RangeWantedPercent = (OutTotalWantedChunksSize * 100.0) / TotalBlockSize; - - if (BlockRanges.size() == 1) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Range request of {} ({:.2f}%) using single range from block {} ({}) as is", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize)); + BuildStorageCache::BuildBlobRanges RangeBuffers = + m_Storage.CacheStorage->GetBuildBlobRanges(m_BuildId, BlockDescription.BlockHash, SubRanges); + if (m_AbortFlag) + { + break; + } + if (RangeBuffers.PayloadBuffer) + { + if (RangeBuffers.Ranges.empty()) + { + SubRangeCount = Ranges.size() - SubRangeCountComplete; + ProcessDownload(BlockDescription, + std::move(RangeBuffers.PayloadBuffer), + SubRangeStartIndex, + RangesSpan.subspan(SubRangeCountComplete, SubRangeCount), + OnDownloaded); + SubRangeCountComplete += SubRangeCount; + continue; + } + else if (RangeBuffers.Ranges.size() == SubRangeCount) + { + ProcessDownload(BlockDescription, + std::move(RangeBuffers.PayloadBuffer), + SubRangeStartIndex, + RangeBuffers.Ranges, + OnDownloaded); + SubRangeCountComplete += SubRangeCount; + continue; + } + } + } } - return BlockRanges; - } - if (LimitToSingleRange) - { - const BlockRangeDescriptor MergedRange = MergeBlockRanges(BlockRanges); - if (m_Options.IsVerbose) - { - const double RangeRequestedPercent = (MergedRange.RangeLength * 100.0) / TotalBlockSize; - const double WastedPercent = ((MergedRange.RangeLength - OutTotalWantedChunksSize) * 100.0) / MergedRange.RangeLength; + size_t SubRangeCount = Min(BlockRangeCount - SubRangeCountComplete, m_Storage.BuildStorageHost.Caps.MaxRangeCountPerRequest); - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) limited to single block range {} ({:.2f}%) wasting " - "{:.2f}% ({})", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockRanges.size(), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - NiceBytes(MergedRange.RangeLength), - RangeRequestedPercent, - WastedPercent, - NiceBytes(MergedRange.RangeLength - OutTotalWantedChunksSize)); - } - return MakeOptionalBlockRangeVector(TotalBlockSize, MergedRange); - } + auto SubRanges = RangesSpan.subspan(SubRangeCountComplete, SubRangeCount); - if (RangeWantedPercent > FullBlockRangePercentLimit) - { - const BlockRangeDescriptor MergedRange = MergeBlockRanges(BlockRanges); - if (m_Options.IsVerbose) + BuildStorageBase::BuildBlobRanges RangeBuffers = + m_Storage.BuildStorage->GetBuildBlobRanges(m_BuildId, BlockDescription.BlockHash, SubRanges); + if (m_AbortFlag) { - const double RangeRequestedPercent = (MergedRange.RangeLength * 100.0) / TotalBlockSize; - const double WastedPercent = ((MergedRange.RangeLength - OutTotalWantedChunksSize) * 100.0) / MergedRange.RangeLength; - - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) exceeds {}%. Merged to single block range {} " - "({:.2f}%) wasting {:.2f}% ({})", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockRanges.size(), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - FullBlockRangePercentLimit, - NiceBytes(MergedRange.RangeLength), - RangeRequestedPercent, - WastedPercent, - NiceBytes(MergedRange.RangeLength - OutTotalWantedChunksSize)); + break; } - return MakeOptionalBlockRangeVector(TotalBlockSize, MergedRange); - } - - std::vector<BlockRangeDescriptor> CollapsedBlockRanges = CollapseBlockRanges(16u * 1024u, BlockRanges); - while (GetBlockRangeLimitForRange(ForceMergeLimits, TotalBlockSize, CollapsedBlockRanges)) - { - CollapsedBlockRanges = CollapseBlockRanges(CalculateNextGap(CollapsedBlockRanges), CollapsedBlockRanges); - } - - const std::uint64_t WantedCollapsedSize = - std::accumulate(CollapsedBlockRanges.begin(), - CollapsedBlockRanges.end(), - uint64_t(0), - [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; }); - - const double CollapsedRangeRequestedPercent = (WantedCollapsedSize * 100.0) / TotalBlockSize; - - if (m_Options.IsVerbose) - { - const double WastedPercent = ((WantedCollapsedSize - OutTotalWantedChunksSize) * 100.0) / WantedCollapsedSize; - - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) collapsed to {} {:.2f}% using {} ranges wasting {:.2f}% " - "({})", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockRanges.size(), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - NiceBytes(WantedCollapsedSize), - CollapsedRangeRequestedPercent, - CollapsedBlockRanges.size(), - WastedPercent, - NiceBytes(WantedCollapsedSize - OutTotalWantedChunksSize)); - } - return CollapsedBlockRanges; -} - -void -BuildsOperationUpdateFolder::DownloadPartialBlock( - const BlockRangeDescriptor BlockRange, - const BlobsExistsResult& ExistsResult, - std::function<void(IoBuffer&& InMemoryBuffer, const std::filesystem::path& OnDiskPath)>&& OnDownloaded) -{ - const uint32_t BlockIndex = BlockRange.BlockIndex; - - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - - IoBuffer BlockBuffer; - if (m_Storage.BuildCacheStorage && ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash)) - { - BlockBuffer = - m_Storage.BuildCacheStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength); - } - if (!BlockBuffer) - { - BlockBuffer = - m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength); - } - if (!BlockBuffer) - { - throw std::runtime_error(fmt::format("Block {} is missing when fetching range {} -> {}", - BlockDescription.BlockHash, - BlockRange.RangeStart, - BlockRange.RangeStart + BlockRange.RangeLength)); - } - if (!m_AbortFlag) - { - uint64_t BlockSize = BlockBuffer.GetSize(); - m_DownloadStats.DownloadedBlockCount++; - m_DownloadStats.DownloadedBlockByteCount += BlockSize; - m_DownloadStats.RequestsCompleteCount++; - - std::filesystem::path BlockChunkPath; - - // Check if the dowloaded block is file based and we can move it directly without rewriting it + if (RangeBuffers.PayloadBuffer) { - IoBufferFileReference FileRef; - if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && (FileRef.FileChunkSize == BlockSize)) + if (RangeBuffers.Ranges.empty()) { - ZEN_TRACE_CPU("MoveTempPartialBlock"); + // Jupiter will ignore the ranges and send the whole payload if it fetches the payload from S3 + // Upload to cache (if enabled) and use the whole payload for the remaining ranges - std::error_code Ec; - std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); - if (!Ec) + if (m_Storage.CacheStorage && m_Options.PopulateCache) { - BlockBuffer.SetDeleteOnClose(false); - BlockBuffer = {}; - BlockChunkPath = m_TempBlockFolderPath / - fmt::format("{}_{:x}_{:x}", BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength); - RenameFile(TempBlobPath, BlockChunkPath, Ec); - if (Ec) + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, + BlockDescription.BlockHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(std::vector<IoBuffer>{RangeBuffers.PayloadBuffer})); + if (m_AbortFlag) { - BlockChunkPath = std::filesystem::path{}; - - // Re-open the temp file again - BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); - BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true); - BlockBuffer.SetDeleteOnClose(true); + break; } } - } - } - if (BlockChunkPath.empty() && (BlockSize > m_Options.MaximumInMemoryPayloadSize)) - { - ZEN_TRACE_CPU("WriteTempPartialBlock"); - // Could not be moved and rather large, lets store it on disk - BlockChunkPath = m_TempBlockFolderPath / - fmt::format("{}_{:x}_{:x}", BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength); - TemporaryFile::SafeWriteFile(BlockChunkPath, BlockBuffer); - BlockBuffer = {}; + SubRangeCount = Ranges.size() - SubRangeCountComplete; + ProcessDownload(BlockDescription, + std::move(RangeBuffers.PayloadBuffer), + SubRangeStartIndex, + RangesSpan.subspan(SubRangeCountComplete, SubRangeCount), + OnDownloaded); + } + else + { + if (RangeBuffers.Ranges.size() != SubRanges.size()) + { + throw std::runtime_error(fmt::format("Fetching {} ranges from {} resulted in {} ranges", + SubRanges.size(), + BlockDescription.BlockHash, + RangeBuffers.Ranges.size())); + } + ProcessDownload(BlockDescription, + std::move(RangeBuffers.PayloadBuffer), + SubRangeStartIndex, + RangeBuffers.Ranges, + OnDownloaded); + } } - if (!m_AbortFlag) + else { - OnDownloaded(std::move(BlockBuffer), std::move(BlockChunkPath)); + throw std::runtime_error(fmt::format("Block {} is missing when fetching {} ranges", BlockDescription.BlockHash, SubRangeCount)); } + + SubRangeCountComplete += SubRangeCount; } } @@ -4083,7 +3882,8 @@ BuildsOperationUpdateFolder::WriteSequenceChunkToCache(BufferedWriteFileCache::L } bool -BuildsOperationUpdateFolder::GetBlockWriteOps(std::span<const IoHash> ChunkRawHashes, +BuildsOperationUpdateFolder::GetBlockWriteOps(const IoHash& BlockRawHash, + std::span<const IoHash> ChunkRawHashes, std::span<const uint32_t> ChunkCompressedLengths, std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, @@ -4115,9 +3915,34 @@ BuildsOperationUpdateFolder::GetBlockWriteOps(std::span<const IoHash> ChunkR uint64_t VerifyChunkSize; CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer::MakeView(ChunkMemoryView), VerifyChunkHash, VerifyChunkSize); - ZEN_ASSERT(CompressedChunk); - ZEN_ASSERT(VerifyChunkHash == ChunkHash); - ZEN_ASSERT(VerifyChunkSize == m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); + if (!CompressedChunk) + { + throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} is not a valid compressed buffer", + ChunkHash, + OffsetInBlock, + ChunkCompressedSize, + BlockRawHash)); + } + if (VerifyChunkHash != ChunkHash) + { + throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} has a mismatching content hash {}", + ChunkHash, + OffsetInBlock, + ChunkCompressedSize, + BlockRawHash, + VerifyChunkHash)); + } + if (VerifyChunkSize != m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]) + { + throw std::runtime_error( + fmt::format("Chunk {} at {}, size {} in block {} has a mismatching raw size {}, expected {}", + ChunkHash, + OffsetInBlock, + ChunkCompressedSize, + BlockRawHash, + VerifyChunkSize, + m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex])); + } OodleCompressor ChunkCompressor; OodleCompressionLevel ChunkCompressionLevel; @@ -4138,7 +3963,18 @@ BuildsOperationUpdateFolder::GetBlockWriteOps(std::span<const IoHash> ChunkR { Decompressed = CompressedChunk.Decompress().AsIoBuffer(); } - ZEN_ASSERT(Decompressed.GetSize() == m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); + + if (Decompressed.GetSize() != m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]) + { + throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} decompressed to size {}, expected {}", + ChunkHash, + OffsetInBlock, + ChunkCompressedSize, + BlockRawHash, + Decompressed.GetSize(), + m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex])); + } + ZEN_ASSERT_SLOW(ChunkHash == IoHash::HashBuffer(Decompressed)); for (const ChunkedContentLookup::ChunkSequenceLocation* Target : ChunkTargetPtrs) { @@ -4237,7 +4073,8 @@ BuildsOperationUpdateFolder::WriteChunksBlockToCache(const ChunkBlockDescription const std::vector<uint32_t> ChunkCompressedLengths = ReadChunkBlockHeader(BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder()), HeaderSize); - if (GetBlockWriteOps(BlockDescription.ChunkRawHashes, + if (GetBlockWriteOps(BlockDescription.BlockHash, + BlockDescription.ChunkRawHashes, ChunkCompressedLengths, SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndexNeedsCopyFromSourceFlags, @@ -4252,7 +4089,8 @@ BuildsOperationUpdateFolder::WriteChunksBlockToCache(const ChunkBlockDescription return false; } - if (GetBlockWriteOps(BlockDescription.ChunkRawHashes, + if (GetBlockWriteOps(BlockDescription.BlockHash, + BlockDescription.ChunkRawHashes, BlockDescription.ChunkCompressedLengths, SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndexNeedsCopyFromSourceFlags, @@ -4283,7 +4121,8 @@ BuildsOperationUpdateFolder::WritePartialBlockChunksToCache(const ChunkBlockDesc const MemoryView BlockView = BlockMemoryBuffer.GetView(); BlockWriteOps Ops; - if (GetBlockWriteOps(BlockDescription.ChunkRawHashes, + if (GetBlockWriteOps(BlockDescription.BlockHash, + BlockDescription.ChunkRawHashes, BlockDescription.ChunkCompressedLengths, SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndexNeedsCopyFromSourceFlags, @@ -5156,12 +4995,12 @@ BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& const IoHash& BlockHash = OutBlocks.BlockDescriptions[BlockIndex].BlockHash; const uint64_t CompressedBlockSize = Payload.GetCompressedSize(); - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) + if (m_Storage.CacheStorage && m_Options.PopulateCache) { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, - BlockHash, - ZenContentType::kCompressedBinary, - Payload.GetCompressed()); + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, + BlockHash, + ZenContentType::kCompressedBinary, + Payload.GetCompressed()); } m_Storage.BuildStorage->PutBuildBlob(m_BuildId, @@ -5179,11 +5018,11 @@ BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); } - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) + if (m_Storage.CacheStorage && m_Options.PopulateCache) { - m_Storage.BuildCacheStorage->PutBlobMetadatas(m_BuildId, - std::vector<IoHash>({BlockHash}), - std::vector<CbObject>({BlockMetaData})); + m_Storage.CacheStorage->PutBlobMetadatas(m_BuildId, + std::vector<IoHash>({BlockHash}), + std::vector<CbObject>({BlockMetaData})); } bool MetadataSucceeded = @@ -5334,6 +5173,13 @@ BuildsOperationUploadFolder::FetchChunk(const ChunkedFolderContent& Content, ZEN_ASSERT(!ChunkLocations.empty()); CompositeBuffer Chunk = OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, ChunkLocations[0].Offset, Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); + if (!Chunk) + { + throw std::runtime_error(fmt::format("Unable to read chunk at {}, size {} from '{}'", + ChunkLocations[0].Offset, + Content.ChunkedContent.ChunkRawSizes[ChunkIndex], + Content.Paths[Lookup.SequenceIndexFirstPathIndex[ChunkLocations[0].SequenceIndex]])); + } ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == ChunkHash); return Chunk; }; @@ -5362,10 +5208,7 @@ BuildsOperationUploadFolder::GenerateBlock(const ChunkedFolderContent& Content, Content.ChunkedContent.ChunkHashes[ChunkIndex], [this, &Content, &Lookup, &OpenFileCache, ChunkIndex](const IoHash& ChunkHash) -> std::pair<uint64_t, CompressedBuffer> { CompositeBuffer Chunk = FetchChunk(Content, Lookup, ChunkHash, OpenFileCache); - if (!Chunk) - { - ZEN_ASSERT(false); - } + ZEN_ASSERT(Chunk); uint64_t RawSize = Chunk.GetSize(); const bool ShouldCompressChunk = RawSize >= m_Options.MinimumSizeForCompressInBlock && @@ -6023,11 +5866,11 @@ BuildsOperationUploadFolder::UploadBuildPart(ChunkingController& ChunkController { const CbObject BlockMetaData = BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) + if (m_Storage.CacheStorage && m_Options.PopulateCache) { - m_Storage.BuildCacheStorage->PutBlobMetadatas(m_BuildId, - std::vector<IoHash>({BlockHash}), - std::vector<CbObject>({BlockMetaData})); + m_Storage.CacheStorage->PutBlobMetadatas(m_BuildId, + std::vector<IoHash>({BlockHash}), + std::vector<CbObject>({BlockMetaData})); } bool MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData); if (MetadataSucceeded) @@ -6221,9 +6064,9 @@ BuildsOperationUploadFolder::UploadPartBlobs(const ChunkedFolderContent& Co const CbObject BlockMetaData = BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) + if (m_Storage.CacheStorage && m_Options.PopulateCache) { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); } m_Storage.BuildStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); if (m_Options.IsVerbose) @@ -6237,11 +6080,11 @@ BuildsOperationUploadFolder::UploadPartBlobs(const ChunkedFolderContent& Co UploadedBlockSize += PayloadSize; TempUploadStats.BlocksBytes += PayloadSize; - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) + if (m_Storage.CacheStorage && m_Options.PopulateCache) { - m_Storage.BuildCacheStorage->PutBlobMetadatas(m_BuildId, - std::vector<IoHash>({BlockHash}), - std::vector<CbObject>({BlockMetaData})); + m_Storage.CacheStorage->PutBlobMetadatas(m_BuildId, + std::vector<IoHash>({BlockHash}), + std::vector<CbObject>({BlockMetaData})); } bool MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData); if (MetadataSucceeded) @@ -6304,9 +6147,9 @@ BuildsOperationUploadFolder::UploadPartBlobs(const ChunkedFolderContent& Co const uint64_t PayloadSize = Payload.GetSize(); - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) + if (m_Storage.CacheStorage && m_Options.PopulateCache) { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); } if (PayloadSize >= LargeAttachmentSize) @@ -7050,14 +6893,14 @@ BuildsOperationPrimeCache::Execute() std::vector<IoHash> BlobsToDownload; BlobsToDownload.reserve(BuildBlobs.size()); - if (m_Storage.BuildCacheStorage && !BuildBlobs.empty() && !m_Options.ForceUpload) + if (m_Storage.CacheStorage && !BuildBlobs.empty() && !m_Options.ForceUpload) { ZEN_TRACE_CPU("BlobCacheExistCheck"); Stopwatch Timer; const std::vector<IoHash> BlobHashes(BuildBlobs.begin(), BuildBlobs.end()); const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult = - m_Storage.BuildCacheStorage->BlobsExists(m_BuildId, BlobHashes); + m_Storage.CacheStorage->BlobsExists(m_BuildId, BlobHashes); if (CacheExistsResult.size() == BlobHashes.size()) { @@ -7104,33 +6947,33 @@ BuildsOperationPrimeCache::Execute() for (size_t BlobIndex = 0; BlobIndex < BlobCount; BlobIndex++) { - Work.ScheduleWork( - m_NetworkPool, - [this, - &Work, - &BlobsToDownload, - BlobCount, - &LooseChunkRawSizes, - &CompletedDownloadCount, - &FilteredDownloadedBytesPerSecond, - &MultipartAttachmentCount, - BlobIndex](std::atomic<bool>&) { - if (!m_AbortFlag) - { - const IoHash& BlobHash = BlobsToDownload[BlobIndex]; + Work.ScheduleWork(m_NetworkPool, + [this, + &Work, + &BlobsToDownload, + BlobCount, + &LooseChunkRawSizes, + &CompletedDownloadCount, + &FilteredDownloadedBytesPerSecond, + &MultipartAttachmentCount, + BlobIndex](std::atomic<bool>&) { + if (!m_AbortFlag) + { + const IoHash& BlobHash = BlobsToDownload[BlobIndex]; - bool IsLargeBlob = false; + bool IsLargeBlob = false; - if (auto It = LooseChunkRawSizes.find(BlobHash); It != LooseChunkRawSizes.end()) - { - IsLargeBlob = It->second >= m_Options.LargeAttachmentSize; - } + if (auto It = LooseChunkRawSizes.find(BlobHash); It != LooseChunkRawSizes.end()) + { + IsLargeBlob = It->second >= m_Options.LargeAttachmentSize; + } - FilteredDownloadedBytesPerSecond.Start(); + FilteredDownloadedBytesPerSecond.Start(); - if (IsLargeBlob) - { - DownloadLargeBlob(*m_Storage.BuildStorage, + if (IsLargeBlob) + { + DownloadLargeBlob( + *m_Storage.BuildStorage, m_TempPath, m_BuildId, BlobHash, @@ -7146,12 +6989,12 @@ BuildsOperationPrimeCache::Execute() if (!m_AbortFlag) { - if (Payload && m_Storage.BuildCacheStorage) + if (Payload && m_Storage.CacheStorage) { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, - BlobHash, - ZenContentType::kCompressedBinary, - CompositeBuffer(SharedBuffer(Payload))); + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, + BlobHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(SharedBuffer(Payload))); } } CompletedDownloadCount++; @@ -7160,32 +7003,32 @@ BuildsOperationPrimeCache::Execute() FilteredDownloadedBytesPerSecond.Stop(); } }); - } - else - { - IoBuffer Payload = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlobHash); - m_DownloadStats.DownloadedBlockCount++; - m_DownloadStats.DownloadedBlockByteCount += Payload.GetSize(); - m_DownloadStats.RequestsCompleteCount++; + } + else + { + IoBuffer Payload = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlobHash); + m_DownloadStats.DownloadedBlockCount++; + m_DownloadStats.DownloadedBlockByteCount += Payload.GetSize(); + m_DownloadStats.RequestsCompleteCount++; - if (!m_AbortFlag) - { - if (Payload && m_Storage.BuildCacheStorage) - { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, - BlobHash, - ZenContentType::kCompressedBinary, - CompositeBuffer(SharedBuffer(std::move(Payload)))); - } - } - CompletedDownloadCount++; - if (CompletedDownloadCount == BlobCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - } - } - }); + if (!m_AbortFlag) + { + if (Payload && m_Storage.CacheStorage) + { + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, + BlobHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(SharedBuffer(std::move(Payload)))); + } + } + CompletedDownloadCount++; + if (CompletedDownloadCount == BlobCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + } + } + }); } Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { @@ -7197,10 +7040,10 @@ BuildsOperationPrimeCache::Execute() std::string DownloadRateString = (CompletedDownloadCount == BlobCount) ? "" : fmt::format(" {}bits/s", NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8)); - std::string UploadDetails = m_Storage.BuildCacheStorage ? fmt::format(" {} ({}) uploaded.", - m_StorageCacheStats.PutBlobCount.load(), - NiceBytes(m_StorageCacheStats.PutBlobByteCount.load())) - : ""; + std::string UploadDetails = m_Storage.CacheStorage ? fmt::format(" {} ({}) uploaded.", + m_StorageCacheStats.PutBlobCount.load(), + NiceBytes(m_StorageCacheStats.PutBlobByteCount.load())) + : ""; std::string Details = fmt::format("{}/{} ({}{}) downloaded.{}", CompletedDownloadCount.load(), @@ -7225,13 +7068,13 @@ BuildsOperationPrimeCache::Execute() return; } - if (m_Storage.BuildCacheStorage) + if (m_Storage.CacheStorage) { - m_Storage.BuildCacheStorage->Flush(m_LogOutput.GetProgressUpdateDelayMS(), [this](intptr_t Remaining) -> bool { + m_Storage.CacheStorage->Flush(m_LogOutput.GetProgressUpdateDelayMS(), [this](intptr_t Remaining) -> bool { ZEN_UNUSED(Remaining); if (!m_Options.IsQuiet) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Waiting for {} blobs to finish upload to '{}'", Remaining, m_Storage.CacheName); + ZEN_OPERATION_LOG_INFO(m_LogOutput, "Waiting for {} blobs to finish upload to '{}'", Remaining, m_Storage.CacheHost.Name); } return !m_AbortFlag; }); @@ -7431,16 +7274,31 @@ GetRemoteContent(OperationLogOutput& Output, // TODO: GetBlockDescriptions for all BlockRawHashes in one go - check for local block descriptions when we cache them { + if (!IsQuiet) + { + ZEN_OPERATION_LOG_INFO(Output, "Fetching metadata for {} blocks", BlockRawHashes.size()); + } + + Stopwatch GetBlockMetadataTimer; + bool AttemptFallback = false; OutBlockDescriptions = GetBlockDescriptions(Output, *Storage.BuildStorage, - Storage.BuildCacheStorage.get(), + Storage.CacheStorage.get(), BuildId, - BuildPartId, BlockRawHashes, AttemptFallback, IsQuiet, IsVerbose); + + if (!IsQuiet) + { + ZEN_OPERATION_LOG_INFO(Output, + "GetBlockMetadata for {} took {}. Found {} blocks", + BuildPartId, + NiceTimeSpanMs(GetBlockMetadataTimer.GetElapsedTimeMs()), + OutBlockDescriptions.size()); + } } CalculateLocalChunkOrders(AbsoluteChunkOrders, @@ -7989,6 +7847,8 @@ namespace buildstorageoperations_testutils { } // namespace buildstorageoperations_testutils +TEST_SUITE_BEGIN("remotestore.buildstorageoperations"); + TEST_CASE("buildstorageoperations.upload.folder") { using namespace buildstorageoperations_testutils; @@ -8176,106 +8036,270 @@ TEST_CASE("buildstorageoperations.memorychunkingcache") TEST_CASE("buildstorageoperations.upload.multipart") { - using namespace buildstorageoperations_testutils; + // Disabled since it relies on authentication and specific block being present in cloud storage + if (false) + { + using namespace buildstorageoperations_testutils; - FastRandom BaseRandom; + FastRandom BaseRandom; - const size_t FileCount = 11; + const size_t FileCount = 11; - const std::string Paths[FileCount] = {{"file_1"}, - {"file_2.exe"}, - {"file_3.txt"}, - {"dir_1/dir1_file_1.exe"}, - {"dir_1/dir1_file_2.pdb"}, - {"dir_1/dir1_file_3.txt"}, - {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, - {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, - {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, - {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, - {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; - const uint64_t Sizes[FileCount] = - {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + const std::string Paths[FileCount] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t Sizes[FileCount] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; - ScopedTemporaryDirectory SourceFolder; - TestState State(SourceFolder.Path()); - State.Initialize(); - State.CreateSourceData("source", Paths, Sizes); + ScopedTemporaryDirectory SourceFolder; + TestState State(SourceFolder.Path()); + State.Initialize(); + State.CreateSourceData("source", Paths, Sizes); - std::span<const std::string> ManifestFiles1(Paths); - ManifestFiles1 = ManifestFiles1.subspan(0, FileCount / 2); + std::span<const std::string> ManifestFiles1(Paths); + ManifestFiles1 = ManifestFiles1.subspan(0, FileCount / 2); - std::span<const uint64_t> ManifestSizes1(Sizes); - ManifestSizes1 = ManifestSizes1.subspan(0, FileCount / 2); + std::span<const uint64_t> ManifestSizes1(Sizes); + ManifestSizes1 = ManifestSizes1.subspan(0, FileCount / 2); - std::span<const std::string> ManifestFiles2(Paths); - ManifestFiles2 = ManifestFiles2.subspan(FileCount / 2 - 1); + std::span<const std::string> ManifestFiles2(Paths); + ManifestFiles2 = ManifestFiles2.subspan(FileCount / 2 - 1); - std::span<const uint64_t> ManifestSizes2(Sizes); - ManifestSizes2 = ManifestSizes2.subspan(FileCount / 2 - 1); + std::span<const uint64_t> ManifestSizes2(Sizes); + ManifestSizes2 = ManifestSizes2.subspan(FileCount / 2 - 1); - const Oid BuildPart1Id = Oid::NewOid(); - const std::string BuildPart1Name = "part1"; - const Oid BuildPart2Id = Oid::NewOid(); - const std::string BuildPart2Name = "part2"; - { - CbObjectWriter Writer; - Writer.BeginObject("parts"sv); + const Oid BuildPart1Id = Oid::NewOid(); + const std::string BuildPart1Name = "part1"; + const Oid BuildPart2Id = Oid::NewOid(); + const std::string BuildPart2Name = "part2"; { - Writer.BeginObject(BuildPart1Name); + CbObjectWriter Writer; + Writer.BeginObject("parts"sv); { - Writer.AddObjectId("partId"sv, BuildPart1Id); - Writer.BeginArray("files"sv); - for (const std::string& ManifestFile : ManifestFiles1) + Writer.BeginObject(BuildPart1Name); { - Writer.AddString(ManifestFile); + Writer.AddObjectId("partId"sv, BuildPart1Id); + Writer.BeginArray("files"sv); + for (const std::string& ManifestFile : ManifestFiles1) + { + Writer.AddString(ManifestFile); + } + Writer.EndArray(); // files + } + Writer.EndObject(); // part1 + + Writer.BeginObject(BuildPart2Name); + { + Writer.AddObjectId("partId"sv, BuildPart2Id); + Writer.BeginArray("files"sv); + for (const std::string& ManifestFile : ManifestFiles2) + { + Writer.AddString(ManifestFile); + } + Writer.EndArray(); // files } - Writer.EndArray(); // files + Writer.EndObject(); // part2 + } + Writer.EndObject(); // parts + + ExtendableStringBuilder<1024> Manifest; + CompactBinaryToJson(Writer.Save(), Manifest); + WriteFile(State.RootPath / "manifest.json", IoBuffer(IoBuffer::Wrap, Manifest.Data(), Manifest.Size())); + } + + const Oid BuildId = Oid::NewOid(); + + auto Result = State.Upload(BuildId, {}, {}, "source", State.RootPath / "manifest.json"); + + CHECK_EQ(Result.size(), 2u); + CHECK_EQ(Result[0].first, BuildPart1Id); + CHECK_EQ(Result[0].second, BuildPart1Name); + CHECK_EQ(Result[1].first, BuildPart2Id); + CHECK_EQ(Result[1].second, BuildPart2Name); + State.ValidateUpload(BuildId, Result); + + FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false); + State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); + + FolderContent Part1DownloadContent = State.Download(BuildId, BuildPart1Id, {}, "download_part1", /* Append */ false); + State.ValidateDownload(ManifestFiles1, ManifestSizes1, "source", "download_part1", Part1DownloadContent); + + FolderContent Part2DownloadContent = State.Download(BuildId, Oid::Zero, BuildPart2Name, "download_part2", /* Append */ false); + State.ValidateDownload(ManifestFiles2, ManifestSizes2, "source", "download_part2", Part2DownloadContent); + + (void)State.Download(BuildId, BuildPart1Id, BuildPart1Name, "download_part1+2", /* Append */ false); + FolderContent Part1And2DownloadContent = State.Download(BuildId, BuildPart2Id, {}, "download_part1+2", /* Append */ true); + State.ValidateDownload(Paths, Sizes, "source", "download_part1+2", Part1And2DownloadContent); + } +} + +TEST_CASE("buildstorageoperations.partial.block.download" * doctest::skip(true)) +{ + const std::string OidcExecutableName = "OidcToken" ZEN_EXE_SUFFIX_LITERAL; + std::filesystem::path OidcTokenExePath = (GetRunningExecutablePath().parent_path() / OidcExecutableName).make_preferred(); + + HttpClientSettings ClientSettings{ + .LogCategory = "httpbuildsclient", + .AccessTokenProvider = + httpclientauth::CreateFromOidcTokenExecutable(OidcTokenExePath, "https://jupiter.devtools.epicgames.com", true, false, false), + .AssumeHttp2 = false, + .AllowResume = true, + .RetryCount = 0, + .Verbose = false}; + + HttpClient HttpClient("https://euc.jupiter.devtools.epicgames.com", ClientSettings); + + const std::string_view Namespace = "fortnite.oplog"; + const std::string_view Bucket = "fortnitegame.staged-build.fortnite-main.ps4-client"; + const Oid BuildId = Oid::FromHexString("09a76ea92ad301d4724fafad"); + + { + HttpClient::Response Response = HttpClient.Get(fmt::format("/api/v2/builds/{}/{}/{}", Namespace, Bucket, BuildId), + HttpClient::Accept(ZenContentType::kCbObject)); + CbValidateError ValidateResult = CbValidateError::None; + CbObject Object = ValidateAndReadCompactBinaryObject(IoBuffer(Response.ResponsePayload), ValidateResult); + REQUIRE(ValidateResult == CbValidateError::None); + } + + std::vector<ChunkBlockDescription> BlockDescriptions; + { + CbObjectWriter Request; + + Request.BeginArray("blocks"sv); + { + Request.AddHash(IoHash::FromHexString("7c353ed782675a5e8f968e61e51fc797ecdc2882")); + } + Request.EndArray(); + + IoBuffer Payload = Request.Save().GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + + HttpClient::Response BlockDescriptionsResponse = + HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/{}/blocks/getBlockMetadata", Namespace, Bucket, BuildId), + Payload, + HttpClient::Accept(ZenContentType::kCbObject)); + REQUIRE(BlockDescriptionsResponse.IsSuccess()); + + CbValidateError ValidateResult = CbValidateError::None; + CbObject Object = ValidateAndReadCompactBinaryObject(IoBuffer(BlockDescriptionsResponse.ResponsePayload), ValidateResult); + REQUIRE(ValidateResult == CbValidateError::None); + + { + CbArrayView BlocksArray = Object["blocks"sv].AsArrayView(); + for (CbFieldView Block : BlocksArray) + { + ChunkBlockDescription Description = ParseChunkBlockDescription(Block.AsObjectView()); + BlockDescriptions.emplace_back(std::move(Description)); } - Writer.EndObject(); // part1 + } + } + + REQUIRE(!BlockDescriptions.empty()); - Writer.BeginObject(BuildPart2Name); + const IoHash BlockHash = BlockDescriptions.back().BlockHash; + + const ChunkBlockDescription& BlockDescription = BlockDescriptions.front(); + REQUIRE(!BlockDescription.ChunkRawHashes.empty()); + REQUIRE(!BlockDescription.ChunkCompressedLengths.empty()); + + std::vector<std::pair<uint64_t, uint64_t>> ChunkOffsetAndSizes; + uint64_t Offset = gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + + for (uint32_t ChunkCompressedSize : BlockDescription.ChunkCompressedLengths) + { + ChunkOffsetAndSizes.push_back(std::make_pair(Offset, ChunkCompressedSize)); + Offset += ChunkCompressedSize; + } + + ScopedTemporaryDirectory SourceFolder; + + auto Validate = [&](std::span<const uint32_t> ChunkIndexesToFetch) { + std::vector<std::pair<uint64_t, uint64_t>> Ranges; + for (uint32_t ChunkIndex : ChunkIndexesToFetch) + { + Ranges.push_back(ChunkOffsetAndSizes[ChunkIndex]); + } + + HttpClient::KeyValueMap Headers; + if (!Ranges.empty()) + { + ExtendableStringBuilder<512> SB; + for (const std::pair<uint64_t, uint64_t>& R : Ranges) { - Writer.AddObjectId("partId"sv, BuildPart2Id); - Writer.BeginArray("files"sv); - for (const std::string& ManifestFile : ManifestFiles2) + if (SB.Size() > 0) { - Writer.AddString(ManifestFile); + SB << ", "; } - Writer.EndArray(); // files + SB << R.first << "-" << R.first + R.second - 1; } - Writer.EndObject(); // part2 + Headers.Entries.insert({"Range", fmt::format("bytes={}", SB.ToView())}); } - Writer.EndObject(); // parts - ExtendableStringBuilder<1024> Manifest; - CompactBinaryToJson(Writer.Save(), Manifest); - WriteFile(State.RootPath / "manifest.json", IoBuffer(IoBuffer::Wrap, Manifest.Data(), Manifest.Size())); - } + HttpClient::Response GetBlobRangesResponse = HttpClient.Download( + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}?supportsRedirect=false", Namespace, Bucket, BuildId, BlockHash), + SourceFolder.Path(), + Headers); - const Oid BuildId = Oid::NewOid(); + REQUIRE(GetBlobRangesResponse.IsSuccess()); + [[maybe_unused]] MemoryView RangesMemoryView = GetBlobRangesResponse.ResponsePayload.GetView(); - auto Result = State.Upload(BuildId, {}, {}, "source", State.RootPath / "manifest.json"); + std::vector<std::pair<uint64_t, uint64_t>> PayloadRanges = GetBlobRangesResponse.GetRanges(Ranges); + if (PayloadRanges.empty()) + { + // We got the whole blob, use the ranges as is + PayloadRanges = Ranges; + } - CHECK_EQ(Result.size(), 2u); - CHECK_EQ(Result[0].first, BuildPart1Id); - CHECK_EQ(Result[0].second, BuildPart1Name); - CHECK_EQ(Result[1].first, BuildPart2Id); - CHECK_EQ(Result[1].second, BuildPart2Name); - State.ValidateUpload(BuildId, Result); + REQUIRE(PayloadRanges.size() == Ranges.size()); - FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false); - State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); + for (uint32_t RangeIndex = 0; RangeIndex < PayloadRanges.size(); RangeIndex++) + { + const std::pair<uint64_t, uint64_t>& PayloadRange = PayloadRanges[RangeIndex]; + + CHECK_EQ(PayloadRange.second, Ranges[RangeIndex].second); - FolderContent Part1DownloadContent = State.Download(BuildId, BuildPart1Id, {}, "download_part1", /* Append */ false); - State.ValidateDownload(ManifestFiles1, ManifestSizes1, "source", "download_part1", Part1DownloadContent); + IoBuffer ChunkPayload(GetBlobRangesResponse.ResponsePayload, PayloadRange.first, PayloadRange.second); + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer(ChunkPayload), RawHash, RawSize); + CHECK(CompressedChunk); + CHECK_EQ(RawHash, BlockDescription.ChunkRawHashes[ChunkIndexesToFetch[RangeIndex]]); + CHECK_EQ(RawSize, BlockDescription.ChunkRawLengths[ChunkIndexesToFetch[RangeIndex]]); + } + }; - FolderContent Part2DownloadContent = State.Download(BuildId, Oid::Zero, BuildPart2Name, "download_part2", /* Append */ false); - State.ValidateDownload(ManifestFiles2, ManifestSizes2, "source", "download_part2", Part2DownloadContent); + { + // Single + std::vector<uint32_t> ChunkIndexesToFetch{uint32_t(BlockDescription.ChunkCompressedLengths.size() / 2)}; + Validate(ChunkIndexesToFetch); + } + { + // Many + std::vector<uint32_t> ChunkIndexesToFetch; + for (uint32_t Index = 0; Index < BlockDescription.ChunkCompressedLengths.size() / 16; Index++) + { + ChunkIndexesToFetch.push_back(uint32_t(BlockDescription.ChunkCompressedLengths.size() / 6 + Index * 7)); + ChunkIndexesToFetch.push_back(uint32_t(BlockDescription.ChunkCompressedLengths.size() / 6 + Index * 7 + 1)); + ChunkIndexesToFetch.push_back(uint32_t(BlockDescription.ChunkCompressedLengths.size() / 6 + Index * 7 + 3)); + } + Validate(ChunkIndexesToFetch); + } - (void)State.Download(BuildId, BuildPart1Id, BuildPart1Name, "download_part1+2", /* Append */ false); - FolderContent Part1And2DownloadContent = State.Download(BuildId, BuildPart2Id, {}, "download_part1+2", /* Append */ true); - State.ValidateDownload(Paths, Sizes, "source", "download_part1+2", Part1And2DownloadContent); + { + // First and last + std::vector<uint32_t> ChunkIndexesToFetch{0, uint32_t(BlockDescription.ChunkCompressedLengths.size() - 1)}; + Validate(ChunkIndexesToFetch); + } } +TEST_SUITE_END(); void buildstorageoperations_forcelink() diff --git a/src/zenremotestore/builds/buildstorageutil.cpp b/src/zenremotestore/builds/buildstorageutil.cpp index 36b45e800..2ae726e29 100644 --- a/src/zenremotestore/builds/buildstorageutil.cpp +++ b/src/zenremotestore/builds/buildstorageutil.cpp @@ -63,11 +63,15 @@ ResolveBuildStorage(OperationLogOutput& Output, std::string HostUrl; std::string HostName; + double HostLatencySec = -1.0; + uint64_t HostMaxRangeCountPerRequest = 1; std::string CacheUrl; std::string CacheName; - bool HostAssumeHttp2 = ClientSettings.AssumeHttp2; - bool CacheAssumeHttp2 = ClientSettings.AssumeHttp2; + bool HostAssumeHttp2 = ClientSettings.AssumeHttp2; + bool CacheAssumeHttp2 = ClientSettings.AssumeHttp2; + double CacheLatencySec = -1.0; + uint64_t CacheMaxRangeCountPerRequest = 1; JupiterServerDiscovery DiscoveryResponse; const std::string_view DiscoveryHost = Host.empty() ? OverrideHost : Host; @@ -98,8 +102,10 @@ ResolveBuildStorage(OperationLogOutput& Output, { ZEN_OPERATION_LOG_INFO(Output, "Server endpoint at '{}/api/v1/status/servers' succeeded", OverrideHost); } - HostUrl = OverrideHost; - HostName = GetHostNameFromUrl(OverrideHost); + HostUrl = OverrideHost; + HostName = GetHostNameFromUrl(OverrideHost); + HostLatencySec = TestResult.LatencySeconds; + HostMaxRangeCountPerRequest = TestResult.MaxRangeCountPerRequest; } else { @@ -134,9 +140,11 @@ ResolveBuildStorage(OperationLogOutput& Output, ZEN_OPERATION_LOG_INFO(Output, "Server endpoint at '{}/api/v1/status/servers' succeeded", ServerEndpoint.BaseUrl); } - HostUrl = ServerEndpoint.BaseUrl; - HostAssumeHttp2 = ServerEndpoint.AssumeHttp2; - HostName = ServerEndpoint.Name; + HostUrl = ServerEndpoint.BaseUrl; + HostAssumeHttp2 = ServerEndpoint.AssumeHttp2; + HostName = ServerEndpoint.Name; + HostLatencySec = TestResult.LatencySeconds; + HostMaxRangeCountPerRequest = TestResult.MaxRangeCountPerRequest; break; } else @@ -180,9 +188,11 @@ ResolveBuildStorage(OperationLogOutput& Output, ZEN_OPERATION_LOG_INFO(Output, "Cache endpoint at '{}/status/builds' succeeded", CacheEndpoint.BaseUrl); } - CacheUrl = CacheEndpoint.BaseUrl; - CacheAssumeHttp2 = CacheEndpoint.AssumeHttp2; - CacheName = CacheEndpoint.Name; + CacheUrl = CacheEndpoint.BaseUrl; + CacheAssumeHttp2 = CacheEndpoint.AssumeHttp2; + CacheName = CacheEndpoint.Name; + CacheLatencySec = TestResult.LatencySeconds; + CacheMaxRangeCountPerRequest = TestResult.MaxRangeCountPerRequest; break; } } @@ -204,6 +214,7 @@ ResolveBuildStorage(OperationLogOutput& Output, CacheUrl = ZenServerLocalHostUrl; CacheAssumeHttp2 = false; CacheName = "localhost"; + CacheLatencySec = TestResult.LatencySeconds; } } }); @@ -219,8 +230,10 @@ ResolveBuildStorage(OperationLogOutput& Output, if (ZenCacheEndpointTestResult TestResult = TestZenCacheEndpoint(ZenCacheHost, /*AssumeHttp2*/ false, ClientSettings.Verbose); TestResult.Success) { - CacheUrl = ZenCacheHost; - CacheName = GetHostNameFromUrl(ZenCacheHost); + CacheUrl = ZenCacheHost; + CacheName = GetHostNameFromUrl(ZenCacheHost); + CacheLatencySec = TestResult.LatencySeconds; + CacheMaxRangeCountPerRequest = TestResult.MaxRangeCountPerRequest; } else { @@ -228,13 +241,34 @@ ResolveBuildStorage(OperationLogOutput& Output, } } - return BuildStorageResolveResult{.HostUrl = HostUrl, - .HostName = HostName, - .HostAssumeHttp2 = HostAssumeHttp2, + return BuildStorageResolveResult{ + .Cloud = {.Address = HostUrl, + .Name = HostName, + .AssumeHttp2 = HostAssumeHttp2, + .LatencySec = HostLatencySec, + .Caps = BuildStorageResolveResult::Capabilities{.MaxRangeCountPerRequest = HostMaxRangeCountPerRequest}}, + .Cache = {.Address = CacheUrl, + .Name = CacheName, + .AssumeHttp2 = CacheAssumeHttp2, + .LatencySec = CacheLatencySec, + .Caps = BuildStorageResolveResult::Capabilities{.MaxRangeCountPerRequest = CacheMaxRangeCountPerRequest}}}; +} - .CacheUrl = CacheUrl, - .CacheName = CacheName, - .CacheAssumeHttp2 = CacheAssumeHttp2}; +std::vector<ChunkBlockDescription> +ParseBlockMetadatas(std::span<const CbObject> BlockMetadatas) +{ + std::vector<ChunkBlockDescription> UnorderedList; + UnorderedList.reserve(BlockMetadatas.size()); + for (size_t CacheBlockMetadataIndex = 0; CacheBlockMetadataIndex < BlockMetadatas.size(); CacheBlockMetadataIndex++) + { + const CbObject& CacheBlockMetadata = BlockMetadatas[CacheBlockMetadataIndex]; + ChunkBlockDescription Description = ParseChunkBlockDescription(CacheBlockMetadata); + if (Description.BlockHash != IoHash::Zero) + { + UnorderedList.emplace_back(std::move(Description)); + } + } + return UnorderedList; } std::vector<ChunkBlockDescription> @@ -242,7 +276,6 @@ GetBlockDescriptions(OperationLogOutput& Output, BuildStorageBase& Storage, BuildStorageCache* OptionalCacheStorage, const Oid& BuildId, - const Oid& BuildPartId, std::span<const IoHash> BlockRawHashes, bool AttemptFallback, bool IsQuiet, @@ -250,37 +283,20 @@ GetBlockDescriptions(OperationLogOutput& Output, { using namespace std::literals; - if (!IsQuiet) - { - ZEN_OPERATION_LOG_INFO(Output, "Fetching metadata for {} blocks", BlockRawHashes.size()); - } - - Stopwatch GetBlockMetadataTimer; - std::vector<ChunkBlockDescription> UnorderedList; tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockDescriptionLookup; if (OptionalCacheStorage && !BlockRawHashes.empty()) { std::vector<CbObject> CacheBlockMetadatas = OptionalCacheStorage->GetBlobMetadatas(BuildId, BlockRawHashes); - UnorderedList.reserve(CacheBlockMetadatas.size()); - for (size_t CacheBlockMetadataIndex = 0; CacheBlockMetadataIndex < CacheBlockMetadatas.size(); CacheBlockMetadataIndex++) + if (!CacheBlockMetadatas.empty()) { - const CbObject& CacheBlockMetadata = CacheBlockMetadatas[CacheBlockMetadataIndex]; - ChunkBlockDescription Description = ParseChunkBlockDescription(CacheBlockMetadata); - if (Description.BlockHash == IoHash::Zero) + UnorderedList = ParseBlockMetadatas(CacheBlockMetadatas); + for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++) { - ZEN_OPERATION_LOG_WARN(Output, "Unexpected/invalid block metadata received from remote cache, skipping block"); - } - else - { - UnorderedList.emplace_back(std::move(Description)); + const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex]; + BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex); } } - for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++) - { - const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex]; - BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex); - } } if (UnorderedList.size() < BlockRawHashes.size()) @@ -346,15 +362,6 @@ GetBlockDescriptions(OperationLogOutput& Output, } } - if (!IsQuiet) - { - ZEN_OPERATION_LOG_INFO(Output, - "GetBlockMetadata for {} took {}. Found {} blocks", - BuildPartId, - NiceTimeSpanMs(GetBlockMetadataTimer.GetElapsedTimeMs()), - Result.size()); - } - if (Result.size() != BlockRawHashes.size()) { std::string ErrorDescription = diff --git a/src/zenremotestore/builds/filebuildstorage.cpp b/src/zenremotestore/builds/filebuildstorage.cpp index 55e69de61..2f4904449 100644 --- a/src/zenremotestore/builds/filebuildstorage.cpp +++ b/src/zenremotestore/builds/filebuildstorage.cpp @@ -432,6 +432,45 @@ public: return IoBuffer{}; } + virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId, + const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_TRACE_CPU("FileBuildStorage::GetBuildBlobRanges"); + ZEN_UNUSED(BuildId); + ZEN_ASSERT(!Ranges.empty()); + + uint64_t ReceivedBytes = 0; + uint64_t SentBytes = Ranges.size() * 2 * 8; + + SimulateLatency(SentBytes, 0); + auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); }); + + Stopwatch ExecutionTimer; + auto __ = MakeGuard([&]() { AddStatistic(ExecutionTimer, SentBytes, ReceivedBytes); }); + + BuildBlobRanges Result; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (IsFile(BlockPath)) + { + BasicFile File(BlockPath, BasicFile::Mode::kRead); + + uint64_t RangeOffset = Ranges.front().first; + uint64_t RangeBytes = Ranges.back().first + Ranges.back().second - RangeOffset; + Result.PayloadBuffer = IoBufferBuilder::MakeFromFileHandle(File.Detach(), RangeOffset, RangeBytes); + + Result.Ranges.reserve(Ranges.size()); + + for (const std::pair<uint64_t, uint64_t>& Range : Ranges) + { + Result.Ranges.push_back(std::make_pair(Range.first - RangeOffset, Range.second)); + } + ReceivedBytes = Result.PayloadBuffer.GetSize(); + } + return Result; + } + virtual std::vector<std::function<void()>> GetLargeBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t ChunkSize, diff --git a/src/zenremotestore/builds/jupiterbuildstorage.cpp b/src/zenremotestore/builds/jupiterbuildstorage.cpp index 23d0ddd4c..8e16da1a9 100644 --- a/src/zenremotestore/builds/jupiterbuildstorage.cpp +++ b/src/zenremotestore/builds/jupiterbuildstorage.cpp @@ -21,7 +21,7 @@ namespace zen { using namespace std::literals; namespace { - void ThrowFromJupiterResult(const JupiterResult& Result, std::string_view Prefix) + [[noreturn]] void ThrowFromJupiterResult(const JupiterResult& Result, std::string_view Prefix) { int Error = Result.ErrorCode < (int)HttpResponseCode::Continue ? Result.ErrorCode : 0; HttpResponseCode Status = @@ -295,6 +295,26 @@ public: return std::move(GetBuildBlobResult.Response); } + virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId, + const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_TRACE_CPU("Jupiter::GetBuildBlob"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + CreateDirectories(m_TempFolderPath); + + BuildBlobRangesResult GetBuildBlobResult = + m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath, Ranges); + AddStatistic(GetBuildBlobResult); + if (!GetBuildBlobResult.Success) + { + ThrowFromJupiterResult(GetBuildBlobResult, "Failed fetching build blob ranges"sv); + } + return BuildBlobRanges{.PayloadBuffer = std::move(GetBuildBlobResult.Response), .Ranges = std::move(GetBuildBlobResult.Ranges)}; + } + virtual std::vector<std::function<void()>> GetLargeBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t ChunkSize, diff --git a/src/zenremotestore/chunking/chunkblock.cpp b/src/zenremotestore/chunking/chunkblock.cpp index c4d8653f4..cca32c17d 100644 --- a/src/zenremotestore/chunking/chunkblock.cpp +++ b/src/zenremotestore/chunking/chunkblock.cpp @@ -7,27 +7,201 @@ #include <zencore/logging.h> #include <zencore/timer.h> #include <zencore/trace.h> - #include <zenremotestore/operationlogoutput.h> -#include <vector> +#include <numeric> ZEN_THIRD_PARTY_INCLUDES_START -#include <tsl/robin_map.h> +#include <tsl/robin_set.h> ZEN_THIRD_PARTY_INCLUDES_END #if ZEN_WITH_TESTS # include <zencore/testing.h> # include <zencore/testutils.h> - -# include <unordered_map> -# include <numeric> #endif // ZEN_WITH_TESTS namespace zen { using namespace std::literals; +namespace chunkblock_impl { + + struct RangeDescriptor + { + uint64_t RangeStart = 0; + uint64_t RangeLength = 0; + uint32_t ChunkBlockIndexStart = 0; + uint32_t ChunkBlockIndexCount = 0; + }; + + void MergeCheapestRange(std::vector<RangeDescriptor>& InOutRanges) + { + ZEN_ASSERT(InOutRanges.size() > 1); + + size_t BestRangeIndexToCollapse = SIZE_MAX; + uint64_t BestGap = (uint64_t)-1; + + for (size_t RangeIndex = 0; RangeIndex < InOutRanges.size() - 1; RangeIndex++) + { + const RangeDescriptor& Range = InOutRanges[RangeIndex]; + const RangeDescriptor& NextRange = InOutRanges[RangeIndex + 1]; + uint64_t Gap = NextRange.RangeStart - (Range.RangeStart + Range.RangeLength); + if (Gap < BestGap) + { + BestRangeIndexToCollapse = RangeIndex; + BestGap = Gap; + } + else if (Gap == BestGap) + { + const RangeDescriptor& BestRange = InOutRanges[BestRangeIndexToCollapse]; + const RangeDescriptor& BestNextRange = InOutRanges[BestRangeIndexToCollapse + 1]; + uint64_t BestMergedSize = (BestNextRange.RangeStart + BestNextRange.RangeLength) - BestRange.RangeStart; + uint64_t MergedSize = (NextRange.RangeStart + NextRange.RangeLength) - Range.RangeStart; + if (MergedSize < BestMergedSize) + { + BestRangeIndexToCollapse = RangeIndex; + } + } + } + + ZEN_ASSERT(BestRangeIndexToCollapse != SIZE_MAX); + ZEN_ASSERT(BestRangeIndexToCollapse < InOutRanges.size() - 1); + ZEN_ASSERT(BestGap != (uint64_t)-1); + + RangeDescriptor& BestRange = InOutRanges[BestRangeIndexToCollapse]; + const RangeDescriptor& BestNextRange = InOutRanges[BestRangeIndexToCollapse + 1]; + BestRange.RangeLength = BestNextRange.RangeStart - BestRange.RangeStart + BestNextRange.RangeLength; + BestRange.ChunkBlockIndexCount = + BestNextRange.ChunkBlockIndexStart - BestRange.ChunkBlockIndexStart + BestNextRange.ChunkBlockIndexCount; + InOutRanges.erase(InOutRanges.begin() + BestRangeIndexToCollapse + 1); + } + + std::vector<RangeDescriptor> GetBlockRanges(const ChunkBlockDescription& BlockDescription, + const uint64_t ChunkStartOffsetInBlock, + std::span<const uint32_t> BlockChunkIndexNeeded) + { + ZEN_TRACE_CPU("GetBlockRanges"); + std::vector<RangeDescriptor> BlockRanges; + { + uint64_t CurrentOffset = ChunkStartOffsetInBlock; + uint32_t ChunkBlockIndex = 0; + uint32_t NeedBlockChunkIndexOffset = 0; + RangeDescriptor NextRange; + while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && ChunkBlockIndex < BlockDescription.ChunkRawHashes.size()) + { + const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + NextRange = {}; + } + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + } + else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + if (NextRange.RangeLength == 0) + { + NextRange.RangeStart = CurrentOffset; + NextRange.ChunkBlockIndexStart = ChunkBlockIndex; + } + NextRange.RangeLength += ChunkCompressedLength; + NextRange.ChunkBlockIndexCount++; + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + NeedBlockChunkIndexOffset++; + } + else + { + ZEN_ASSERT(false); + } + } + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + } + } + ZEN_ASSERT(!BlockRanges.empty()); + return BlockRanges; + } + + std::vector<RangeDescriptor> OptimizeRanges(uint64_t TotalBlockSize, + std::span<const RangeDescriptor> ExactRanges, + double LatencySec, + uint64_t SpeedBytesPerSec, + uint64_t MaxRangeCountPerRequest, + uint64_t MaxRangesPerBlock) + { + ZEN_TRACE_CPU("OptimizeRanges"); + ZEN_ASSERT(MaxRangesPerBlock > 0); + std::vector<RangeDescriptor> Ranges(ExactRanges.begin(), ExactRanges.end()); + + while (Ranges.size() > MaxRangesPerBlock) + { + MergeCheapestRange(Ranges); + } + + while (true) + { + const std::uint64_t RangeTotalSize = + std::accumulate(Ranges.begin(), Ranges.end(), uint64_t(0u), [](uint64_t Current, const RangeDescriptor& Value) { + return Current + Value.RangeLength; + }); + + const size_t RangeCount = Ranges.size(); + const uint64_t RequestCount = + MaxRangeCountPerRequest == (uint64_t)-1 ? 1 : (RangeCount + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest; + uint64_t RequestTimeAsBytes = uint64_t(SpeedBytesPerSec * RequestCount * LatencySec); + + if (RangeCount == 1) + { + // Does fetching the full block add less time than the time it takes to complete a single request? + if (TotalBlockSize - RangeTotalSize < SpeedBytesPerSec * LatencySec) + { + const std::uint64_t InitialRangeTotalSize = + std::accumulate(ExactRanges.begin(), + ExactRanges.end(), + uint64_t(0u), + [](uint64_t Current, const RangeDescriptor& Value) { return Current + Value.RangeLength; }); + + ZEN_DEBUG( + "Latency round trip takes as long as receiving the extra redundant bytes - go full block, dropping {} of slack, " + "adding {} of bytes to fetch, for block of size {}", + NiceBytes(TotalBlockSize - RangeTotalSize), + NiceBytes(TotalBlockSize - InitialRangeTotalSize), + NiceBytes(TotalBlockSize)); + return {}; + } + else + { + return Ranges; + } + } + + if (RequestTimeAsBytes < (TotalBlockSize - RangeTotalSize)) + { + return Ranges; + } + + if (RangeCount == 2) + { + // Merge to single range + Ranges.front().RangeLength = Ranges.back().RangeStart - Ranges.front().RangeStart + Ranges.back().RangeLength; + Ranges.front().ChunkBlockIndexCount = + Ranges.back().ChunkBlockIndexStart - Ranges.front().ChunkBlockIndexStart + Ranges.back().ChunkBlockIndexCount; + Ranges.pop_back(); + } + else + { + MergeCheapestRange(Ranges); + } + } + } + +} // namespace chunkblock_impl + ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject) { @@ -455,9 +629,299 @@ FindReuseBlocks(OperationLogOutput& Output, return FilteredReuseBlockIndexes; } +ChunkBlockAnalyser::ChunkBlockAnalyser(OperationLogOutput& LogOutput, + std::span<const ChunkBlockDescription> BlockDescriptions, + const Options& Options) +: m_LogOutput(LogOutput) +, m_BlockDescriptions(BlockDescriptions) +, m_Options(Options) +{ +} + +std::vector<ChunkBlockAnalyser::NeededBlock> +ChunkBlockAnalyser::GetNeeded(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, + std::function<bool(uint32_t ChunkIndex)>&& NeedsBlockChunk) +{ + ZEN_TRACE_CPU("ChunkBlockAnalyser::GetNeeded"); + + std::vector<NeededBlock> Result; + + std::vector<bool> ChunkIsNeeded(ChunkHashToChunkIndex.size()); + for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkHashToChunkIndex.size(); ChunkIndex++) + { + ChunkIsNeeded[ChunkIndex] = NeedsBlockChunk(ChunkIndex); + } + + std::vector<uint64_t> BlockSlack(m_BlockDescriptions.size(), 0u); + for (uint32_t BlockIndex = 0; BlockIndex < m_BlockDescriptions.size(); BlockIndex++) + { + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + + uint64_t BlockUsedSize = 0; + uint64_t BlockSize = 0; + + for (uint32_t ChunkBlockIndex = 0; ChunkBlockIndex < BlockDescription.ChunkRawHashes.size(); ChunkBlockIndex++) + { + const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = It->second; + if (ChunkIsNeeded[RemoteChunkIndex]) + { + BlockUsedSize += BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + } + } + BlockSize += BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + } + BlockSlack[BlockIndex] = BlockSize - BlockUsedSize; + } + + std::vector<uint32_t> BlockOrder(m_BlockDescriptions.size()); + std::iota(BlockOrder.begin(), BlockOrder.end(), 0); + + std::sort(BlockOrder.begin(), BlockOrder.end(), [&BlockSlack](uint32_t Lhs, uint32_t Rhs) { + return BlockSlack[Lhs] < BlockSlack[Rhs]; + }); + + std::vector<bool> ChunkIsPickedUp(ChunkHashToChunkIndex.size(), false); + + for (uint32_t BlockIndex : BlockOrder) + { + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + + std::vector<uint32_t> BlockChunkIndexNeeded; + + for (uint32_t ChunkBlockIndex = 0; ChunkBlockIndex < BlockDescription.ChunkRawHashes.size(); ChunkBlockIndex++) + { + const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = It->second; + if (ChunkIsNeeded[RemoteChunkIndex]) + { + if (!ChunkIsPickedUp[RemoteChunkIndex]) + { + ChunkIsPickedUp[RemoteChunkIndex] = true; + BlockChunkIndexNeeded.push_back(ChunkBlockIndex); + } + } + } + else + { + ZEN_DEBUG("Chunk {} not found in block {}", ChunkHash, BlockDescription.BlockHash); + } + } + + if (!BlockChunkIndexNeeded.empty()) + { + Result.push_back(NeededBlock{.BlockIndex = BlockIndex, .ChunkIndexes = std::move(BlockChunkIndexNeeded)}); + } + } + return Result; +} + +ChunkBlockAnalyser::BlockResult +ChunkBlockAnalyser::CalculatePartialBlockDownloads(std::span<const NeededBlock> NeededBlocks, + std::span<const EPartialBlockDownloadMode> BlockPartialDownloadModes) +{ + ZEN_TRACE_CPU("ChunkBlockAnalyser::CalculatePartialBlockDownloads"); + + Stopwatch PartialAnalisysTimer; + + ChunkBlockAnalyser::BlockResult Result; + + { + uint64_t MinRequestCount = 0; + uint64_t RequestCount = 0; + uint64_t RangeCount = 0; + uint64_t IdealDownloadTotalSize = 0; + uint64_t ActualDownloadTotalSize = 0; + uint64_t FullDownloadTotalSize = 0; + for (const NeededBlock& NeededBlock : NeededBlocks) + { + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex]; + std::span<const uint32_t> BlockChunkIndexNeeded(NeededBlock.ChunkIndexes); + const uint32_t ChunkStartOffsetInBlock = + gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + uint64_t TotalBlockSize = std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), + BlockDescription.ChunkCompressedLengths.end(), + uint64_t(ChunkStartOffsetInBlock)); + uint64_t ExactRangesSize = 0; + uint64_t DownloadRangesSize = 0; + uint64_t FullDownloadSize = 0; + + bool CanDoPartialBlockDownload = (BlockDescription.HeaderSize > 0) && + (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size()); + + if (NeededBlock.ChunkIndexes.size() == BlockDescription.ChunkRawHashes.size() || !CanDoPartialBlockDownload) + { + // Full block + ExactRangesSize = TotalBlockSize; + DownloadRangesSize = TotalBlockSize; + FullDownloadSize = TotalBlockSize; + MinRequestCount++; + RequestCount++; + RangeCount++; + Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); + } + else if (NeededBlock.ChunkIndexes.empty()) + { + // Not needed + } + else + { + FullDownloadSize = TotalBlockSize; + std::vector<chunkblock_impl::RangeDescriptor> Ranges = + chunkblock_impl::GetBlockRanges(BlockDescription, ChunkStartOffsetInBlock, BlockChunkIndexNeeded); + ExactRangesSize = std::accumulate( + Ranges.begin(), + Ranges.end(), + uint64_t(0), + [](uint64_t Current, const chunkblock_impl::RangeDescriptor& Range) { return Current + Range.RangeLength; }); + + EPartialBlockDownloadMode PartialBlockDownloadMode = BlockPartialDownloadModes[NeededBlock.BlockIndex]; + if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Off) + { + // Use full block + MinRequestCount++; + RangeCount++; + RequestCount++; + Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); + DownloadRangesSize = TotalBlockSize; + } + else + { + const bool IsHighSpeed = (PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed); + uint64_t MaxRangeCountPerRequest = + IsHighSpeed ? m_Options.HostHighSpeedMaxRangeCountPerRequest : m_Options.HostMaxRangeCountPerRequest; + ZEN_ASSERT(MaxRangeCountPerRequest != 0); + + if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Exact) + { + // Use exact ranges + for (const chunkblock_impl::RangeDescriptor& Range : Ranges) + { + Result.BlockRanges.push_back(BlockRangeDescriptor{.BlockIndex = NeededBlock.BlockIndex, + .RangeStart = Range.RangeStart, + .RangeLength = Range.RangeLength, + .ChunkBlockIndexStart = Range.ChunkBlockIndexStart, + .ChunkBlockIndexCount = Range.ChunkBlockIndexCount}); + } + + MinRequestCount++; + RangeCount += Ranges.size(); + RequestCount += MaxRangeCountPerRequest == (uint64_t)-1 + ? 1 + : (Ranges.size() + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest; + DownloadRangesSize = ExactRangesSize; + } + else + { + if (PartialBlockDownloadMode == EPartialBlockDownloadMode::SingleRange) + { + // Use single range + if (Ranges.size() > 1) + { + Ranges = {chunkblock_impl::RangeDescriptor{ + .RangeStart = Ranges.front().RangeStart, + .RangeLength = Ranges.back().RangeStart + Ranges.back().RangeLength - Ranges.front().RangeStart, + .ChunkBlockIndexStart = Ranges.front().ChunkBlockIndexStart, + .ChunkBlockIndexCount = Ranges.back().ChunkBlockIndexStart + Ranges.back().ChunkBlockIndexCount - + Ranges.front().ChunkBlockIndexStart}}; + } + + // We still do the optimize pass to see if it is more effective to use a full block + } + + double LatencySec = IsHighSpeed ? m_Options.HostHighSpeedLatencySec : m_Options.HostLatencySec; + uint64_t SpeedBytesPerSec = IsHighSpeed ? m_Options.HostHighSpeedBytesPerSec : m_Options.HostSpeedBytesPerSec; + if (LatencySec > 0.0 && SpeedBytesPerSec > 0u) + { + Ranges = chunkblock_impl::OptimizeRanges(TotalBlockSize, + Ranges, + LatencySec, + SpeedBytesPerSec, + MaxRangeCountPerRequest, + m_Options.MaxRangesPerBlock); + } + + MinRequestCount++; + if (Ranges.empty()) + { + Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); + RequestCount++; + RangeCount++; + DownloadRangesSize = TotalBlockSize; + } + else + { + for (const chunkblock_impl::RangeDescriptor& Range : Ranges) + { + Result.BlockRanges.push_back(BlockRangeDescriptor{.BlockIndex = NeededBlock.BlockIndex, + .RangeStart = Range.RangeStart, + .RangeLength = Range.RangeLength, + .ChunkBlockIndexStart = Range.ChunkBlockIndexStart, + .ChunkBlockIndexCount = Range.ChunkBlockIndexCount}); + } + RangeCount += Ranges.size(); + RequestCount += MaxRangeCountPerRequest == (uint64_t)-1 + ? 1 + : (Ranges.size() + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest; + } + + DownloadRangesSize = Ranges.empty() + ? TotalBlockSize + : std::accumulate(Ranges.begin(), + Ranges.end(), + uint64_t(0), + [](uint64_t Current, const chunkblock_impl::RangeDescriptor& Range) { + return Current + Range.RangeLength; + }); + } + } + } + IdealDownloadTotalSize += ExactRangesSize; + ActualDownloadTotalSize += DownloadRangesSize; + FullDownloadTotalSize += FullDownloadSize; + + if (ExactRangesSize < FullDownloadSize) + { + ZEN_DEBUG("Block {}: Full: {}, Ideal: {}, Actual: {}, Saves: {}", + NeededBlock.BlockIndex, + NiceBytes(FullDownloadSize), + NiceBytes(ExactRangesSize), + NiceBytes(DownloadRangesSize), + NiceBytes(FullDownloadSize - DownloadRangesSize)); + } + } + uint64_t Actual = FullDownloadTotalSize - ActualDownloadTotalSize; + uint64_t Ideal = FullDownloadTotalSize - IdealDownloadTotalSize; + if (Ideal < FullDownloadTotalSize && !m_Options.IsQuiet) + { + const double AchievedPercent = Ideal == 0 ? 100.0 : (100.0 * Actual) / Ideal; + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Block Partial Analysis: Blocks: {}, Full: {}, Ideal: {}, Actual: {}. Skipping {} ({:.1f}%) out of " + "possible {} using {} extra ranges " + "via {} extra requests. Completed in {}", + NeededBlocks.size(), + NiceBytes(FullDownloadTotalSize), + NiceBytes(IdealDownloadTotalSize), + NiceBytes(ActualDownloadTotalSize), + NiceBytes(FullDownloadTotalSize - ActualDownloadTotalSize), + AchievedPercent, + NiceBytes(Ideal), + RangeCount - MinRequestCount, + RequestCount - MinRequestCount, + NiceTimeSpanMs(PartialAnalisysTimer.GetElapsedTimeMs())); + } + } + + return Result; +} + #if ZEN_WITH_TESTS -namespace testutils { +namespace chunkblock_testutils { static std::vector<std::pair<Oid, CompressedBuffer>> CreateAttachments( const std::span<const size_t>& Sizes, OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast, @@ -474,12 +938,14 @@ namespace testutils { return Result; } -} // namespace testutils +} // namespace chunkblock_testutils + +TEST_SUITE_BEGIN("remotestore.chunkblock"); -TEST_CASE("project.store.block") +TEST_CASE("chunkblock.block") { using namespace std::literals; - using namespace testutils; + using namespace chunkblock_testutils; std::vector<std::size_t> AttachmentSizes({7633, 6825, 5738, 8031, 7225, 566, 3656, 6006, 24, 3466, 1093, 4269, 2257, 3685, 3489, 7194, 6151, 5482, 6217, 3511, 6738, 5061, 7537, 2759, 1916, 8210, 2235, 4024, 1582, 5251, @@ -504,10 +970,10 @@ TEST_CASE("project.store.block") HeaderSize)); } -TEST_CASE("project.store.reuseblocks") +TEST_CASE("chunkblock.reuseblocks") { using namespace std::literals; - using namespace testutils; + using namespace chunkblock_testutils; std::vector<std::vector<std::size_t>> BlockAttachmentSizes( {std::vector<std::size_t>{7633, 6825, 5738, 8031, 7225, 566, 3656, 6006, 24, 3466, 1093, 4269, 2257, 3685, 3489, @@ -744,6 +1210,894 @@ TEST_CASE("project.store.reuseblocks") } } +namespace chunkblock_analyser_testutils { + + // Build a ChunkBlockDescription without any real payload. + // Hashes are derived deterministically from (BlockSeed XOR ChunkIndex) so that the same + // seed produces the same hashes — useful for deduplication tests. + static ChunkBlockDescription MakeBlockDesc(uint64_t HeaderSize, + std::initializer_list<uint32_t> CompressedLengths, + uint32_t BlockSeed = 0) + { + ChunkBlockDescription Desc; + Desc.HeaderSize = HeaderSize; + uint32_t ChunkIndex = 0; + for (uint32_t Length : CompressedLengths) + { + uint64_t HashInput = uint64_t(BlockSeed ^ ChunkIndex); + Desc.ChunkRawHashes.push_back(IoHash::HashBuffer(MemoryView(&HashInput, sizeof(HashInput)))); + Desc.ChunkRawLengths.push_back(Length); + Desc.ChunkCompressedLengths.push_back(Length); + ChunkIndex++; + } + return Desc; + } + + // Build the robin_map<IoHash, uint32_t> needed by GetNeeded from a flat list of blocks. + // First occurrence of each hash wins; index is assigned sequentially across all blocks. + [[maybe_unused]] static tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> MakeHashMap(const std::vector<ChunkBlockDescription>& Blocks) + { + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Result; + uint32_t Index = 0; + for (const ChunkBlockDescription& Block : Blocks) + { + for (const IoHash& Hash : Block.ChunkRawHashes) + { + if (!Result.contains(Hash)) + { + Result.emplace(Hash, Index++); + } + } + } + return Result; + } + +} // namespace chunkblock_analyser_testutils + +TEST_CASE("chunkblock.mergecheapestrange.picks_smallest_gap") +{ + using RD = chunkblock_impl::RangeDescriptor; + // Gap between ranges 0-1 is 50, gap between 1-2 is 150 → pair 0-1 gets merged + std::vector<RD> Ranges = { + {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 150, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + }; + chunkblock_impl::MergeCheapestRange(Ranges); + + REQUIRE_EQ(2u, Ranges.size()); + CHECK_EQ(0u, Ranges[0].RangeStart); + CHECK_EQ(250u, Ranges[0].RangeLength); // 150+100 + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(2u, Ranges[0].ChunkBlockIndexCount); + CHECK_EQ(400u, Ranges[1].RangeStart); + CHECK_EQ(100u, Ranges[1].RangeLength); + CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.mergecheapestrange.tiebreak_smaller_merged") +{ + using RD = chunkblock_impl::RangeDescriptor; + // Gap 0-1 == gap 1-2 == 100; merged size 0-1 (250) < merged size 1-2 (350) → pair 0-1 wins + std::vector<RD> Ranges = { + {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 200, .RangeLength = 50, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 350, .RangeLength = 200, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + }; + chunkblock_impl::MergeCheapestRange(Ranges); + + REQUIRE_EQ(2u, Ranges.size()); + // Pair 0-1 merged: start=0, length = (200+50)-0 = 250 + CHECK_EQ(0u, Ranges[0].RangeStart); + CHECK_EQ(250u, Ranges[0].RangeLength); + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(2u, Ranges[0].ChunkBlockIndexCount); + // Pair 1 unchanged (was index 2) + CHECK_EQ(350u, Ranges[1].RangeStart); + CHECK_EQ(200u, Ranges[1].RangeLength); + CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.optimizeranges.preserves_ranges_low_latency") +{ + using RD = chunkblock_impl::RangeDescriptor; + // With MaxRangeCountPerRequest unlimited, RequestCount=1 + // RequestTimeAsBytes = 100000 * 1 * 0.001 = 100 << slack=7000 → all ranges preserved + std::vector<RD> ExactRanges = { + {.RangeStart = 0, .RangeLength = 1000, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 2000, .RangeLength = 1000, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 4000, .RangeLength = 1000, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + }; + uint64_t TotalBlockSize = 10000; + double LatencySec = 0.001; + uint64_t SpeedBytesPerSec = 100000; + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 1024; + + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + REQUIRE_EQ(3u, Result.size()); +} + +TEST_CASE("chunkblock.optimizeranges.falls_back_to_full_block") +{ + using RD = chunkblock_impl::RangeDescriptor; + // 1 range already; slack=100 < SpeedBytesPerSec*LatencySec=200 → full block (empty result) + std::vector<RD> ExactRanges = { + {.RangeStart = 100, .RangeLength = 900, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 3}, + }; + uint64_t TotalBlockSize = 1000; + double LatencySec = 0.01; + uint64_t SpeedBytesPerSec = 20000; + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 1024; + + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + CHECK(Result.empty()); +} + +TEST_CASE("chunkblock.optimizeranges.maxrangesperblock_clamp") +{ + using RD = chunkblock_impl::RangeDescriptor; + // 5 input ranges; MaxRangesPerBlock=2 clamps to ≤2 before the cost model runs + std::vector<RD> ExactRanges = { + {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 300, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 600, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + {.RangeStart = 900, .RangeLength = 100, .ChunkBlockIndexStart = 3, .ChunkBlockIndexCount = 1}, + {.RangeStart = 1200, .RangeLength = 100, .ChunkBlockIndexStart = 4, .ChunkBlockIndexCount = 1}, + }; + uint64_t TotalBlockSize = 5000; + double LatencySec = 0.001; + uint64_t SpeedBytesPerSec = 100000; + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 2; + + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + CHECK(Result.size() <= 2u); + CHECK(!Result.empty()); +} + +TEST_CASE("chunkblock.optimizeranges.low_maxrangecountperrequest_drives_merge") +{ + using RD = chunkblock_impl::RangeDescriptor; + // MaxRangeCountPerRequest=1 means RequestCount==RangeCount; high latency drives merging + // With MaxRangeCountPerRequest=-1 the same 3 ranges would be preserved (verified by comment below) + std::vector<RD> ExactRanges = { + {.RangeStart = 100, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 250, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + }; + uint64_t TotalBlockSize = 1000; + double LatencySec = 1.0; + uint64_t SpeedBytesPerSec = 500; + // With MaxRangeCountPerRequest=-1: RequestCount=1, RequestTimeAsBytes=500 < slack=700 → preserved + // With MaxRangeCountPerRequest=1: RequestCount=3, RequestTimeAsBytes=1500 > slack=700 → merged + uint64_t MaxRangesPerBlock = 1024; + + auto Unlimited = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, (uint64_t)-1, MaxRangesPerBlock); + CHECK_EQ(3u, Unlimited.size()); + + auto Limited = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, uint64_t(1), MaxRangesPerBlock); + CHECK(Limited.size() < 3u); +} + +TEST_CASE("chunkblock.optimizeranges.unlimited_rangecountperrequest_no_extra_cost") +{ + using RD = chunkblock_impl::RangeDescriptor; + // MaxRangeCountPerRequest=-1 → RequestCount always 1, even with many ranges and high latency + std::vector<RD> ExactRanges = { + {.RangeStart = 0, .RangeLength = 50, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 200, .RangeLength = 50, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 400, .RangeLength = 50, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + {.RangeStart = 600, .RangeLength = 50, .ChunkBlockIndexStart = 3, .ChunkBlockIndexCount = 1}, + {.RangeStart = 800, .RangeLength = 50, .ChunkBlockIndexStart = 4, .ChunkBlockIndexCount = 1}, + }; + uint64_t TotalBlockSize = 5000; + double LatencySec = 0.1; + uint64_t SpeedBytesPerSec = 10000; // RequestTimeAsBytes=1000 << slack=4750 + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 1024; + + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + CHECK_EQ(5u, Result.size()); +} + +TEST_CASE("chunkblock.optimizeranges.two_range_direct_merge_path") +{ + using RD = chunkblock_impl::RangeDescriptor; + // Exactly 2 ranges; cost model demands merge; exercises the RangeCount==2 direct-merge branch + // After direct merge → 1 range with small slack → full block (empty) + std::vector<RD> ExactRanges = { + {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 2}, + {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 2}, + }; + uint64_t TotalBlockSize = 600; + double LatencySec = 0.1; + uint64_t SpeedBytesPerSec = 5000; // RequestTimeAsBytes=500 > slack=400 on first iter + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 1024; + + // Iteration 1: RangeCount=2, RequestCount=1, RequestTimeAsBytes=500 > slack=400 → direct merge + // After merge: 1 range [{0,500,0,4}], slack=100 < Speed*Lat=500 → full block + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + CHECK(Result.empty()); +} + +TEST_CASE("chunkblock.getneeded.all_chunks") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + auto HashMap = MakeHashMap({Block}); + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return true; }); + + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + REQUIRE_EQ(4u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]); + CHECK_EQ(1u, NeededBlocks[0].ChunkIndexes[1]); + CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[2]); + CHECK_EQ(3u, NeededBlocks[0].ChunkIndexes[3]); +} + +TEST_CASE("chunkblock.getneeded.no_chunks") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + auto HashMap = MakeHashMap({Block}); + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return false; }); + + CHECK(NeededBlocks.empty()); +} + +TEST_CASE("chunkblock.getneeded.subset_within_block") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + auto HashMap = MakeHashMap({Block}); + // Indices 0 and 2 are needed; 1 and 3 are not + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex == 0 || ChunkIndex == 2; }); + + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + REQUIRE_EQ(2u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]); + CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[1]); +} + +TEST_CASE("chunkblock.getneeded.dedup_low_slack_wins") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + // Block 0: {H0, H1, SharedH, H3} — 3 of 4 needed (H3 not needed); slack = 100 + // Block 1: {H4, H5, SharedH, H6} — only SharedH needed; slack = 300 + // Block 0 has less slack → processed first → SharedH assigned to block 0 + IoHash SharedH = IoHash::HashBuffer(MemoryView("shared_chunk_dedup", 18)); + IoHash H0 = IoHash::HashBuffer(MemoryView("block0_chunk0", 13)); + IoHash H1 = IoHash::HashBuffer(MemoryView("block0_chunk1", 13)); + IoHash H3 = IoHash::HashBuffer(MemoryView("block0_chunk3", 13)); + IoHash H4 = IoHash::HashBuffer(MemoryView("block1_chunk0", 13)); + IoHash H5 = IoHash::HashBuffer(MemoryView("block1_chunk1", 13)); + IoHash H6 = IoHash::HashBuffer(MemoryView("block1_chunk3", 13)); + + ChunkBlockDescription Block0; + Block0.HeaderSize = 50; + Block0.ChunkRawHashes = {H0, H1, SharedH, H3}; + Block0.ChunkRawLengths = {100, 100, 100, 100}; + Block0.ChunkCompressedLengths = {100, 100, 100, 100}; + + ChunkBlockDescription Block1; + Block1.HeaderSize = 50; + Block1.ChunkRawHashes = {H4, H5, SharedH, H6}; + Block1.ChunkRawLengths = {100, 100, 100, 100}; + Block1.ChunkCompressedLengths = {100, 100, 100, 100}; + + std::vector<ChunkBlockDescription> Blocks = {Block0, Block1}; + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, Blocks, Options); + + // Map: H0→0, H1→1, SharedH→2, H3→3, H4→4, H5→5, H6→6 + auto HashMap = MakeHashMap(Blocks); + // Need H0(0), H1(1), SharedH(2) from block 0; SharedH from block 1 (already index 2) + // H3(3) not needed; H4,H5,H6 not needed + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex <= 2; }); + + // Block 0 slack=100 (H3 unused), block 1 slack=300 (H4,H5,H6 unused) + // Block 0 processed first; picks up H0, H1, SharedH + // Block 1 tries SharedH but it's already picked up → empty → not added + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + REQUIRE_EQ(3u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]); + CHECK_EQ(1u, NeededBlocks[0].ChunkIndexes[1]); + CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[2]); +} + +TEST_CASE("chunkblock.getneeded.dedup_no_double_pickup") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + // SharedH appears in both blocks; should appear in the result exactly once + IoHash SharedH = IoHash::HashBuffer(MemoryView("shared_chunk_nodup", 18)); + IoHash H0 = IoHash::HashBuffer(MemoryView("unique_chunk_b0", 15)); + IoHash H1 = IoHash::HashBuffer(MemoryView("unique_chunk_b1a", 16)); + IoHash H2 = IoHash::HashBuffer(MemoryView("unique_chunk_b1b", 16)); + IoHash H3 = IoHash::HashBuffer(MemoryView("unique_chunk_b1c", 16)); + + ChunkBlockDescription Block0; + Block0.HeaderSize = 50; + Block0.ChunkRawHashes = {SharedH, H0}; + Block0.ChunkRawLengths = {100, 100}; + Block0.ChunkCompressedLengths = {100, 100}; + + ChunkBlockDescription Block1; + Block1.HeaderSize = 50; + Block1.ChunkRawHashes = {H1, H2, H3, SharedH}; + Block1.ChunkRawLengths = {100, 100, 100, 100}; + Block1.ChunkCompressedLengths = {100, 100, 100, 100}; + + std::vector<ChunkBlockDescription> Blocks = {Block0, Block1}; + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, Blocks, Options); + + // Map: SharedH→0, H0→1, H1→2, H2→3, H3→4 + // Only SharedH (index 0) needed; no other chunks + auto HashMap = MakeHashMap(Blocks); + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex == 0; }); + + // Block 0: SharedH needed, H0 not needed → slack=100 + // Block 1: SharedH needed, H1/H2/H3 not needed → slack=300 + // Block 0 processed first → picks up SharedH; Block 1 skips it + + // Count total occurrences of SharedH across all NeededBlocks + uint32_t SharedOccurrences = 0; + for (const auto& NB : NeededBlocks) + { + for (uint32_t Idx : NB.ChunkIndexes) + { + // SharedH is at block-local index 0 in Block0 and index 3 in Block1 + (void)Idx; + SharedOccurrences++; + } + } + CHECK_EQ(1u, SharedOccurrences); + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); +} + +TEST_CASE("chunkblock.getneeded.skips_unrequested_chunks") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + // Block has 4 chunks but only 2 appear in the hash map → ChunkIndexes has exactly those 2 + auto Block = MakeBlockDesc(50, {100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + // Only put chunks at positions 0 and 2 in the map + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> HashMap; + HashMap.emplace(Block.ChunkRawHashes[0], 0u); + HashMap.emplace(Block.ChunkRawHashes[2], 1u); + + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return true; }); + + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + REQUIRE_EQ(2u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]); + CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[1]); +} + +TEST_CASE("chunkblock.getneeded.two_blocks_both_contribute") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + // Block 0: all 4 needed (slack=0); block 1: 3 of 4 needed (slack=100) + // Both blocks contribute chunks → 2 NeededBlocks in result + auto Block0 = MakeBlockDesc(50, {100, 100, 100, 100}, /*BlockSeed=*/0); + auto Block1 = MakeBlockDesc(50, {100, 100, 100, 100}, /*BlockSeed=*/200); + + std::vector<ChunkBlockDescription> Blocks = {Block0, Block1}; + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, Blocks, Options); + + // HashMap: Block0 hashes → indices 0-3, Block1 hashes → indices 4-7 + auto HashMap = MakeHashMap(Blocks); + // Need all Block0 chunks (0-3) and Block1 chunks 0-2 (indices 4-6); not chunk index 7 (Block1 chunk 3) + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex <= 6; }); + + CHECK_EQ(2u, NeededBlocks.size()); + // Block 0 has slack=0 (all 4 needed), Block 1 has slack=100 (1 not needed) + // Block 0 comes first in result + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + CHECK_EQ(4u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(1u, NeededBlocks[1].BlockIndex); + CHECK_EQ(3u, NeededBlocks[1].ChunkIndexes.size()); +} + +TEST_CASE("chunkblock.calc.off_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + // HeaderSize > 0, chunks size matches → CanDoPartialBlockDownload = true + // But mode Off forces full block regardless + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::Off}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + CHECK(Result.BlockRanges.empty()); +} + +TEST_CASE("chunkblock.calc.exact_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + // Need chunks 0 and 2 → 2 non-contiguous ranges; Exact mode passes them straight through + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::Exact}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(2u, Result.BlockRanges.size()); + + CHECK_EQ(0u, Result.BlockRanges[0].BlockIndex); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(0u, Result.BlockRanges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Result.BlockRanges[0].ChunkBlockIndexCount); + + CHECK_EQ(0u, Result.BlockRanges[1].BlockIndex); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); // 100+200 before chunk 2 + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); + CHECK_EQ(2u, Result.BlockRanges[1].ChunkBlockIndexStart); + CHECK_EQ(1u, Result.BlockRanges[1].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.calc.singlerange_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + // Default HostLatencySec=-1 → OptimizeRanges not called after SingleRange collapse + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + // Need chunks 0 and 2 → 2 ranges that get collapsed to 1 + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::SingleRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(1u, Result.BlockRanges.size()); + CHECK_EQ(0u, Result.BlockRanges[0].BlockIndex); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + // Spans from chunk 0 start to chunk 2 end: 100+200+300=600 + CHECK_EQ(600u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(0u, Result.BlockRanges[0].ChunkBlockIndexStart); + // ChunkBlockIndexCount = (2+1) - 0 = 3 + CHECK_EQ(3u, Result.BlockRanges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.calc.multirange_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + // Low latency: RequestTimeAsBytes=100 << slack → OptimizeRanges preserves ranges + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostLatencySec = 0.001; + Options.HostSpeedBytesPerSec = 100000; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::MultiRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(2u, Result.BlockRanges.size()); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); +} + +TEST_CASE("chunkblock.calc.multirangehighspeed_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + // Block slack ≈ 714 bytes (TotalBlockSize≈1114, RangeTotalSize=400 for chunks 0+2) + // RequestTimeAsBytes = 400000 * 1 * 0.001 = 400 < 714 → ranges preserved + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostHighSpeedLatencySec = 0.001; + Options.HostHighSpeedBytesPerSec = 400000; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::MultiRangeHighSpeed}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(2u, Result.BlockRanges.size()); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); +} + +TEST_CASE("chunkblock.calc.all_chunks_needed_full_block") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostLatencySec = 0.001; + Options.HostSpeedBytesPerSec = 100000; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + // All 4 chunks needed → short-circuit to full block regardless of mode + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 1, 2, 3}}}; + std::vector<Mode> Modes = {Mode::Exact}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + CHECK(Result.BlockRanges.empty()); +} + +TEST_CASE("chunkblock.calc.headersize_zero_forces_full_block") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + // HeaderSize=0 → CanDoPartialBlockDownload=false → full block even in Exact mode + auto Block = MakeBlockDesc(0, {100, 200, 300, 400}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::Exact}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + CHECK(Result.BlockRanges.empty()); +} + +TEST_CASE("chunkblock.calc.low_maxrangecountperrequest") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + // 5 chunks of 100 bytes each; need chunks 0, 2, 4 → 3 non-contiguous ranges + // With MaxRangeCountPerRequest=1 and high latency, cost model merges aggressively → full block + auto Block = MakeBlockDesc(10, {100, 100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostLatencySec = 0.1; + Options.HostSpeedBytesPerSec = 1000; + Options.HostMaxRangeCountPerRequest = 1; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2, 4}}}; + std::vector<Mode> Modes = {Mode::MultiRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + // Cost model drives merging: 3 requests × 1000 × 0.1 = 300 > slack ≈ 210+headersize + // After merges converges to full block + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + CHECK(Result.BlockRanges.empty()); +} + +TEST_CASE("chunkblock.calc.no_latency_skips_optimize") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + // Default HostLatencySec=-1 → OptimizeRanges not called; raw GetBlockRanges result used + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::MultiRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + // No optimize pass → exact ranges from GetBlockRanges + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(2u, Result.BlockRanges.size()); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); +} + +TEST_CASE("chunkblock.calc.multiple_blocks_different_modes") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + + // 3 blocks with different modes: Off, Exact, MultiRange + auto Block0 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/0); + auto Block1 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/10); + auto Block2 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/20); + + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostLatencySec = 0.001; + Options.HostSpeedBytesPerSec = 100000; + + std::vector<ChunkBlockDescription> Blocks = {Block0, Block1, Block2}; + ChunkBlockAnalyser Analyser(*LogOutput, Blocks, Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + 50; + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = { + {.BlockIndex = 0, .ChunkIndexes = {0, 2}}, + {.BlockIndex = 1, .ChunkIndexes = {0, 2}}, + {.BlockIndex = 2, .ChunkIndexes = {0, 2}}, + }; + std::vector<Mode> Modes = {Mode::Off, Mode::Exact, Mode::MultiRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + // Block 0: Off → FullBlockIndexes + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + + // Block 1: Exact → 2 ranges; Block 2: MultiRange (low latency) → 2 ranges + // Total: 4 ranges + REQUIRE_EQ(4u, Result.BlockRanges.size()); + + // First 2 ranges belong to Block 1 (Exact) + CHECK_EQ(1u, Result.BlockRanges[0].BlockIndex); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(1u, Result.BlockRanges[1].BlockIndex); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); + + // Last 2 ranges belong to Block 2 (MultiRange preserved) + CHECK_EQ(2u, Result.BlockRanges[2].BlockIndex); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[2].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[2].RangeLength); + CHECK_EQ(2u, Result.BlockRanges[3].BlockIndex); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[3].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[3].RangeLength); +} + +TEST_CASE("chunkblock.getblockranges.first_chunk_only") +{ + using namespace chunkblock_analyser_testutils; + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<uint32_t> Needed = {0}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart); + CHECK_EQ(100u, Ranges[0].RangeLength); + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.last_chunk_only") +{ + using namespace chunkblock_analyser_testutils; + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<uint32_t> Needed = {3}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset + 600u, Ranges[0].RangeStart); // 100+200+300 before chunk 3 + CHECK_EQ(400u, Ranges[0].RangeLength); + CHECK_EQ(3u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.middle_chunk_only") +{ + using namespace chunkblock_analyser_testutils; + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<uint32_t> Needed = {1}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset + 100u, Ranges[0].RangeStart); // 100 before chunk 1 + CHECK_EQ(200u, Ranges[0].RangeLength); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.all_chunks") +{ + using namespace chunkblock_analyser_testutils; + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<uint32_t> Needed = {0, 1, 2, 3}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart); + CHECK_EQ(1000u, Ranges[0].RangeLength); // 100+200+300+400 + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(4u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.non_contiguous") +{ + using namespace chunkblock_analyser_testutils; + + // Chunks 0 and 2 needed, chunk 1 skipped → two separate ranges + auto Block = MakeBlockDesc(50, {100, 200, 300}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<uint32_t> Needed = {0, 2}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(2u, Ranges.size()); + + CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart); + CHECK_EQ(100u, Ranges[0].RangeLength); + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount); + + CHECK_EQ(ChunkStartOffset + 300u, Ranges[1].RangeStart); // 100+200 before chunk 2 + CHECK_EQ(300u, Ranges[1].RangeLength); + CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.contiguous_run") +{ + using namespace chunkblock_analyser_testutils; + + // Chunks 1, 2, 3 needed (consecutive) → one merged range + auto Block = MakeBlockDesc(50, {50, 100, 150, 200, 250}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<uint32_t> Needed = {1, 2, 3}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset + 50u, Ranges[0].RangeStart); // 50 before chunk 1 + CHECK_EQ(450u, Ranges[0].RangeLength); // 100+150+200 + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(3u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_SUITE_END(); + void chunkblock_forcelink() { diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp index 26d179f14..c09ab9d3a 100644 --- a/src/zenremotestore/chunking/chunkedcontent.cpp +++ b/src/zenremotestore/chunking/chunkedcontent.cpp @@ -166,7 +166,6 @@ namespace { if (Chunked.Info.ChunkSequence.empty()) { AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Chunked.Info.RawHash, RawSize); - Stats.UniqueSequencesFound++; } else { @@ -186,7 +185,6 @@ namespace { Chunked.Info.ChunkHashes, ChunkSizes); } - Stats.UniqueSequencesFound++; } }); Stats.FilesChunked++; @@ -253,7 +251,7 @@ FolderContent::operator==(const FolderContent& Rhs) const if ((Platform == Rhs.Platform) && (RawSizes == Rhs.RawSizes) && (Attributes == Rhs.Attributes) && (ModificationTicks == Rhs.ModificationTicks) && (Paths.size() == Rhs.Paths.size())) { - size_t PathCount = 0; + size_t PathCount = Paths.size(); for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) { if (Paths[PathIndex].generic_string() != Rhs.Paths[PathIndex].generic_string()) @@ -1706,6 +1704,8 @@ namespace chunkedcontent_testutils { } // namespace chunkedcontent_testutils +TEST_SUITE_BEGIN("remotestore.chunkedcontent"); + TEST_CASE("chunkedcontent.DeletePathsFromContent") { FastRandom BaseRandom; @@ -1924,6 +1924,8 @@ TEST_CASE("chunkedcontent.ApplyChunkedContentOverlay") } } +TEST_SUITE_END(); + #endif // ZEN_WITH_TESTS } // namespace zen diff --git a/src/zenremotestore/chunking/chunkedfile.cpp b/src/zenremotestore/chunking/chunkedfile.cpp index 652110605..633ddfd0d 100644 --- a/src/zenremotestore/chunking/chunkedfile.cpp +++ b/src/zenremotestore/chunking/chunkedfile.cpp @@ -211,6 +211,8 @@ ZEN_THIRD_PARTY_INCLUDES_END namespace zen { # if 0 +TEST_SUITE_BEGIN("remotestore.chunkedfile"); + TEST_CASE("chunkedfile.findparams") { # if 1 @@ -513,6 +515,8 @@ TEST_CASE("chunkedfile.findparams") // WorkLatch.CountDown(); // WorkLatch.Wait(); } + +TEST_SUITE_END(); # endif // 0 void diff --git a/src/zenremotestore/chunking/chunkingcache.cpp b/src/zenremotestore/chunking/chunkingcache.cpp index 7f0a26330..e9b783a00 100644 --- a/src/zenremotestore/chunking/chunkingcache.cpp +++ b/src/zenremotestore/chunking/chunkingcache.cpp @@ -75,13 +75,13 @@ public: { Lock.ReleaseNow(); RwLock::ExclusiveLockScope EditLock(m_Lock); - if (auto RemoveIt = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) + if (auto RemoveIt = m_PathHashToEntry.find(PathHash); RemoveIt != m_PathHashToEntry.end()) { - CachedEntry& DeleteEntry = m_Entries[It->second]; + CachedEntry& DeleteEntry = m_Entries[RemoveIt->second]; DeleteEntry.Chunked = {}; DeleteEntry.ModificationTick = 0; - m_FreeEntryIndexes.push_back(It->second); - m_PathHashToEntry.erase(It); + m_FreeEntryIndexes.push_back(RemoveIt->second); + m_PathHashToEntry.erase(RemoveIt); } } } @@ -461,6 +461,8 @@ namespace chunkingcache_testutils { } } // namespace chunkingcache_testutils +TEST_SUITE_BEGIN("remotestore.chunkingcache"); + TEST_CASE("chunkingcache.nullchunkingcache") { using namespace chunkingcache_testutils; @@ -617,6 +619,8 @@ TEST_CASE("chunkingcache.diskchunkingcache") } } +TEST_SUITE_END(); + void chunkingcache_forcelink() { diff --git a/src/zenremotestore/filesystemutils.cpp b/src/zenremotestore/filesystemutils.cpp index fa1ce6f78..fdb2143d8 100644 --- a/src/zenremotestore/filesystemutils.cpp +++ b/src/zenremotestore/filesystemutils.cpp @@ -637,6 +637,8 @@ namespace { void GenerateFile(const std::filesystem::path& Path) { BasicFile _(Path, BasicFile::Mode::kTruncate); } } // namespace +TEST_SUITE_BEGIN("remotestore.filesystemutils"); + TEST_CASE("filesystemutils.CleanDirectory") { ScopedTemporaryDirectory TmpDir; @@ -692,6 +694,8 @@ TEST_CASE("filesystemutils.CleanDirectory") CHECK(!IsFile(TmpDir.Path() / "CantDeleteMe2" / "deleteme")); } +TEST_SUITE_END(); + #endif } // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorage.h b/src/zenremotestore/include/zenremotestore/builds/buildstorage.h index 85dabc59f..da8437a58 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstorage.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstorage.h @@ -53,15 +53,24 @@ public: std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter, std::function<void(uint64_t, bool)>&& OnSentBytes) = 0; - virtual IoBuffer GetBuildBlob(const Oid& BuildId, - const IoHash& RawHash, - uint64_t RangeOffset = 0, - uint64_t RangeBytes = (uint64_t)-1) = 0; + virtual IoBuffer GetBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + uint64_t RangeOffset = 0, + uint64_t RangeBytes = (uint64_t)-1) = 0; + + struct BuildBlobRanges + { + IoBuffer PayloadBuffer; + std::vector<std::pair<uint64_t, uint64_t>> Ranges; + }; + virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId, + const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) = 0; virtual std::vector<std::function<void()>> GetLargeBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t ChunkSize, std::function<void(uint64_t Offset, const IoBuffer& Chunk)>&& OnReceive, - std::function<void()>&& OnComplete) = 0; + std::function<void()>&& OnComplete) = 0; [[nodiscard]] virtual bool PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) = 0; virtual CbObject FindBlocks(const Oid& BuildId, uint64_t MaxBlockCount) = 0; diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h b/src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h index bb5b1c5f4..24702df0f 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h @@ -37,6 +37,14 @@ public: const IoHash& RawHash, uint64_t RangeOffset = 0, uint64_t RangeBytes = (uint64_t)-1) = 0; + struct BuildBlobRanges + { + IoBuffer PayloadBuffer; + std::vector<std::pair<uint64_t, uint64_t>> Ranges; + }; + virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId, + const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) = 0; virtual void PutBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes, std::span<const CbObject> MetaDatas) = 0; virtual std::vector<CbObject> GetBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes) = 0; @@ -61,10 +69,19 @@ std::unique_ptr<BuildStorageCache> CreateZenBuildStorageCache(HttpClient& H const std::filesystem::path& TempFolderPath, WorkerThreadPool& BackgroundWorkerPool); +#if ZEN_WITH_TESTS +std::unique_ptr<BuildStorageCache> CreateInMemoryBuildStorageCache(uint64_t MaxRangeSupported, + BuildStorageCache::Statistics& Stats, + double LatencySec = 0.0, + double DelayPerKBSec = 0.0); +#endif // ZEN_WITH_TESTS + struct ZenCacheEndpointTestResult { bool Success = false; std::string FailureReason; + double LatencySeconds = -1.0; + uint64_t MaxRangeCountPerRequest = 1; }; ZenCacheEndpointTestResult TestZenCacheEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const bool HttpVerbose); diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h index 6304159ae..0d2eded58 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h @@ -7,7 +7,9 @@ #include <zencore/uid.h> #include <zencore/zencore.h> #include <zenremotestore/builds/buildstoragecache.h> +#include <zenremotestore/chunking/chunkblock.h> #include <zenremotestore/chunking/chunkedcontent.h> +#include <zenremotestore/partialblockrequestmode.h> #include <zenutil/bufferedwritefilecache.h> #include <atomic> @@ -108,17 +110,6 @@ struct RebuildFolderStateStatistics uint64_t FinalizeTreeElapsedWallTimeUs = 0; }; -enum EPartialBlockRequestMode -{ - Off, - ZenCacheOnly, - Mixed, - All, - Invalid -}; - -EPartialBlockRequestMode PartialBlockRequestModeFromString(const std::string_view ModeString); - std::filesystem::path ZenStateFilePath(const std::filesystem::path& ZenFolderPath); std::filesystem::path ZenTempFolderPath(const std::filesystem::path& ZenFolderPath); @@ -170,7 +161,7 @@ public: DownloadStatistics m_DownloadStats; WriteChunkStatistics m_WriteChunkStats; RebuildFolderStateStatistics m_RebuildFolderStateStats; - std::atomic<uint64_t> m_WrittenChunkByteCount; + std::atomic<uint64_t> m_WrittenChunkByteCount = 0; private: struct BlockWriteOps @@ -195,7 +186,7 @@ private: uint32_t ScavengedContentIndex = (uint32_t)-1; uint32_t ScavengedPathIndex = (uint32_t)-1; uint32_t RemoteSequenceIndex = (uint32_t)-1; - uint64_t RawSize = (uint32_t)-1; + uint64_t RawSize = (uint64_t)-1; }; struct CopyChunkData @@ -218,33 +209,6 @@ private: uint64_t ElapsedTimeMs = 0; }; - struct BlockRangeDescriptor - { - uint32_t BlockIndex = (uint32_t)-1; - uint64_t RangeStart = 0; - uint64_t RangeLength = 0; - uint32_t ChunkBlockIndexStart = 0; - uint32_t ChunkBlockIndexCount = 0; - }; - - struct BlockRangeLimit - { - uint16_t SizePercent; - uint16_t MaxRangeCount; - }; - - static constexpr uint16_t FullBlockRangePercentLimit = 95; - - static constexpr BuildsOperationUpdateFolder::BlockRangeLimit ForceMergeLimits[] = { - {.SizePercent = FullBlockRangePercentLimit, .MaxRangeCount = 1}, - {.SizePercent = 90, .MaxRangeCount = 2}, - {.SizePercent = 85, .MaxRangeCount = 8}, - {.SizePercent = 80, .MaxRangeCount = 16}, - {.SizePercent = 70, .MaxRangeCount = 32}, - {.SizePercent = 60, .MaxRangeCount = 48}, - {.SizePercent = 2, .MaxRangeCount = 56}, - {.SizePercent = 0, .MaxRangeCount = 64}}; - void ScanCacheFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedChunkHashesFound, tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedSequenceHashesFound); void ScanTempBlocksFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedBlocksFound); @@ -299,25 +263,14 @@ private: ParallelWork& Work, std::function<void(IoBuffer&& Payload)>&& OnDownloaded); - BlockRangeDescriptor MergeBlockRanges(std::span<const BlockRangeDescriptor> Ranges); - std::optional<std::vector<BlockRangeDescriptor>> MakeOptionalBlockRangeVector(uint64_t TotalBlockSize, - const BlockRangeDescriptor& Range); - const BlockRangeLimit* GetBlockRangeLimitForRange(std::span<const BlockRangeLimit> Limits, - uint64_t TotalBlockSize, - std::span<const BlockRangeDescriptor> Ranges); - std::vector<BlockRangeDescriptor> CollapseBlockRanges(const uint64_t AlwaysAcceptableGap, - std::span<const BlockRangeDescriptor> BlockRanges); - uint64_t CalculateNextGap(std::span<const BlockRangeDescriptor> BlockRanges); - std::optional<std::vector<BlockRangeDescriptor>> CalculateBlockRanges(uint32_t BlockIndex, - const ChunkBlockDescription& BlockDescription, - std::span<const uint32_t> BlockChunkIndexNeeded, - bool LimitToSingleRange, - const uint64_t ChunkStartOffsetInBlock, - const uint64_t TotalBlockSize, - uint64_t& OutTotalWantedChunksSize); - void DownloadPartialBlock(const BlockRangeDescriptor BlockRange, - const BlobsExistsResult& ExistsResult, - std::function<void(IoBuffer&& InMemoryBuffer, const std::filesystem::path& OnDiskPath)>&& OnDownloaded); + void DownloadPartialBlock(std::span<const ChunkBlockAnalyser::BlockRangeDescriptor> BlockRanges, + size_t BlockRangeIndex, + size_t BlockRangeCount, + const BlobsExistsResult& ExistsResult, + std::function<void(IoBuffer&& InMemoryBuffer, + const std::filesystem::path& OnDiskPath, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths)>&& OnDownloaded); std::vector<uint32_t> WriteLocalChunkToCache(CloneQueryInterface* CloneQuery, const CopyChunkData& CopyData, @@ -339,7 +292,8 @@ private: const uint64_t FileOffset, const uint32_t PathIndex); - bool GetBlockWriteOps(std::span<const IoHash> ChunkRawHashes, + bool GetBlockWriteOps(const IoHash& BlockRawHash, + std::span<const IoHash> ChunkRawHashes, std::span<const uint32_t> ChunkCompressedLengths, std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, @@ -408,7 +362,7 @@ private: const std::filesystem::path m_TempDownloadFolderPath; const std::filesystem::path m_TempBlockFolderPath; - std::atomic<uint64_t> m_ValidatedChunkByteCount; + std::atomic<uint64_t> m_ValidatedChunkByteCount = 0; }; struct FindBlocksStatistics diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h index ab3037c89..7306188ca 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h @@ -14,13 +14,20 @@ class BuildStorageCache; struct BuildStorageResolveResult { - std::string HostUrl; - std::string HostName; - bool HostAssumeHttp2 = false; - - std::string CacheUrl; - std::string CacheName; - bool CacheAssumeHttp2 = false; + struct Capabilities + { + uint64_t MaxRangeCountPerRequest = 1; + }; + struct Host + { + std::string Address; + std::string Name; + bool AssumeHttp2 = false; + double LatencySec = -1.0; + Capabilities Caps; + }; + Host Cloud; + Host Cache; }; enum class ZenCacheResolveMode @@ -43,7 +50,6 @@ std::vector<ChunkBlockDescription> GetBlockDescriptions(OperationLogOutput& Out BuildStorageBase& Storage, BuildStorageCache* OptionalCacheStorage, const Oid& BuildId, - const Oid& BuildPartId, std::span<const IoHash> BlockRawHashes, bool AttemptFallback, bool IsQuiet, @@ -51,12 +57,13 @@ std::vector<ChunkBlockDescription> GetBlockDescriptions(OperationLogOutput& Out struct StorageInstance { - std::unique_ptr<HttpClient> BuildStorageHttp; - std::unique_ptr<BuildStorageBase> BuildStorage; - std::string StorageName; + BuildStorageResolveResult::Host BuildStorageHost; + std::unique_ptr<HttpClient> BuildStorageHttp; + std::unique_ptr<BuildStorageBase> BuildStorage; + + BuildStorageResolveResult::Host CacheHost; std::unique_ptr<HttpClient> CacheHttp; - std::unique_ptr<BuildStorageCache> BuildCacheStorage; - std::string CacheName; + std::unique_ptr<BuildStorageCache> CacheStorage; }; } // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h b/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h index d339b0f94..931bb2097 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h @@ -7,8 +7,9 @@ #include <zencore/compactbinary.h> #include <zencore/compress.h> -#include <optional> -#include <vector> +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END namespace zen { @@ -20,13 +21,14 @@ struct ThinChunkBlockDescription struct ChunkBlockDescription : public ThinChunkBlockDescription { - uint64_t HeaderSize; + uint64_t HeaderSize = 0; std::vector<uint32_t> ChunkRawLengths; std::vector<uint32_t> ChunkCompressedLengths; }; std::vector<ChunkBlockDescription> ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject); ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject); +std::vector<ChunkBlockDescription> ParseBlockMetadatas(std::span<const CbObject> BlockMetadatas); CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData); ChunkBlockDescription GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash); typedef std::function<std::pair<uint64_t, CompressedBuffer>(const IoHash& RawHash)> FetchChunkFunc; @@ -73,6 +75,70 @@ std::vector<size_t> FindReuseBlocks(OperationLogOutput& Output, std::span<const uint32_t> ChunkIndexes, std::vector<uint32_t>& OutUnusedChunkIndexes); +class ChunkBlockAnalyser +{ +public: + struct Options + { + bool IsQuiet = false; + bool IsVerbose = false; + double HostLatencySec = -1.0; + double HostHighSpeedLatencySec = -1.0; + uint64_t HostSpeedBytesPerSec = (1u * 1024u * 1024u * 1024u) / 8u; // 1GBit + uint64_t HostHighSpeedBytesPerSec = (2u * 1024u * 1024u * 1024u) / 8u; // 2GBit + uint64_t HostMaxRangeCountPerRequest = (uint64_t)-1; + uint64_t HostHighSpeedMaxRangeCountPerRequest = (uint64_t)-1; // No limit + uint64_t MaxRangesPerBlock = 1024u; + }; + + ChunkBlockAnalyser(OperationLogOutput& LogOutput, std::span<const ChunkBlockDescription> BlockDescriptions, const Options& Options); + + struct BlockRangeDescriptor + { + uint32_t BlockIndex = (uint32_t)-1; + uint64_t RangeStart = 0; + uint64_t RangeLength = 0; + uint32_t ChunkBlockIndexStart = 0; + uint32_t ChunkBlockIndexCount = 0; + }; + + struct NeededBlock + { + uint32_t BlockIndex; + std::vector<uint32_t> ChunkIndexes; + }; + + std::vector<NeededBlock> GetNeeded(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, + std::function<bool(uint32_t ChunkIndex)>&& NeedsBlockChunk); + + enum class EPartialBlockDownloadMode + { + Off, + SingleRange, + MultiRange, + MultiRangeHighSpeed, + Exact + }; + + struct BlockResult + { + std::vector<BlockRangeDescriptor> BlockRanges; + std::vector<uint32_t> FullBlockIndexes; + }; + + BlockResult CalculatePartialBlockDownloads(std::span<const NeededBlock> NeededBlocks, + std::span<const EPartialBlockDownloadMode> BlockPartialDownloadModes); + +private: + OperationLogOutput& m_LogOutput; + const std::span<const ChunkBlockDescription> m_BlockDescriptions; + const Options m_Options; +}; + +#if ZEN_WITH_TESTS + void chunkblock_forcelink(); +#endif // ZEN_WITH_TESTS + } // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h index d402bd3f0..f44381e42 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h @@ -231,7 +231,7 @@ GetSequenceIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& Raw inline uint32_t GetChunkIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) { - return Lookup.RawHashToSequenceIndex.at(RawHash); + return Lookup.ChunkHashToChunkIndex.at(RawHash); } inline uint32_t diff --git a/src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h b/src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h index 432496bc1..caf7ecd28 100644 --- a/src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h +++ b/src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h @@ -2,6 +2,7 @@ #pragma once +#include <cstdint> #include <string> #include <string_view> #include <vector> @@ -28,6 +29,8 @@ struct JupiterEndpointTestResult { bool Success = false; std::string FailureReason; + double LatencySeconds = -1.0; + uint64_t MaxRangeCountPerRequest = 1; }; JupiterEndpointTestResult TestJupiterEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const bool HttpVerbose); diff --git a/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h b/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h index eaf6962fd..8721bc37f 100644 --- a/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h +++ b/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h @@ -56,6 +56,11 @@ struct FinalizeBuildPartResult : JupiterResult std::vector<IoHash> Needs; }; +struct BuildBlobRangesResult : JupiterResult +{ + std::vector<std::pair<uint64_t, uint64_t>> Ranges; +}; + /** * Context for performing Jupiter operations * @@ -135,6 +140,13 @@ public: uint64_t Offset = 0, uint64_t Size = (uint64_t)-1); + BuildBlobRangesResult GetBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + std::filesystem::path TempFolderPath, + std::span<const std::pair<uint64_t, uint64_t>> Ranges); + JupiterResult PutMultipartBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, diff --git a/src/zenremotestore/include/zenremotestore/operationlogoutput.h b/src/zenremotestore/include/zenremotestore/operationlogoutput.h index 9693e69cf..32b95f50f 100644 --- a/src/zenremotestore/include/zenremotestore/operationlogoutput.h +++ b/src/zenremotestore/include/zenremotestore/operationlogoutput.h @@ -3,6 +3,7 @@ #pragma once #include <zencore/fmtutils.h> +#include <zencore/logbase.h> namespace zen { @@ -10,7 +11,7 @@ class OperationLogOutput { public: virtual ~OperationLogOutput() {} - virtual void EmitLogMessage(int LogLevel, std::string_view Format, fmt::format_args Args) = 0; + virtual void EmitLogMessage(const logging::LogPoint& Point, fmt::format_args Args) = 0; virtual void SetLogOperationName(std::string_view Name) = 0; virtual void SetLogOperationProgress(uint32_t StepIndex, uint32_t StepCount) = 0; @@ -57,23 +58,19 @@ public: virtual ProgressBar* CreateProgressBar(std::string_view InSubTask) = 0; }; -struct LoggerRef; +OperationLogOutput* CreateStandardLogOutput(LoggerRef Log); -OperationLogOutput* CreateStandardLogOutput(LoggerRef& Log); - -#define ZEN_OPERATION_LOG(OutputTarget, InLevel, fmtstr, ...) \ - do \ - { \ - using namespace std::literals; \ - ZEN_CHECK_FORMAT_STRING(fmtstr##sv, ##__VA_ARGS__); \ - OutputTarget.EmitLogMessage(InLevel, fmtstr, zen::logging::LogCaptureArguments(__VA_ARGS__)); \ +#define ZEN_OPERATION_LOG(OutputTarget, InLevel, fmtstr, ...) \ + do \ + { \ + using namespace std::literals; \ + static constinit zen::logging::LogPoint LogPoint{{}, InLevel, std::string_view(fmtstr)}; \ + ZEN_CHECK_FORMAT_STRING(fmtstr##sv, ##__VA_ARGS__); \ + (OutputTarget).EmitLogMessage(LogPoint, zen::logging::LogCaptureArguments(__VA_ARGS__)); \ } while (false) -#define ZEN_OPERATION_LOG_INFO(OutputTarget, fmtstr, ...) \ - ZEN_OPERATION_LOG((OutputTarget), zen::logging::level::Info, fmtstr, ##__VA_ARGS__) -#define ZEN_OPERATION_LOG_DEBUG(OutputTarget, fmtstr, ...) \ - ZEN_OPERATION_LOG((OutputTarget), zen::logging::level::Debug, fmtstr, ##__VA_ARGS__) -#define ZEN_OPERATION_LOG_WARN(OutputTarget, fmtstr, ...) \ - ZEN_OPERATION_LOG((OutputTarget), zen::logging::level::Warn, fmtstr, ##__VA_ARGS__) +#define ZEN_OPERATION_LOG_INFO(OutputTarget, fmtstr, ...) ZEN_OPERATION_LOG(OutputTarget, zen::logging::Info, fmtstr, ##__VA_ARGS__) +#define ZEN_OPERATION_LOG_DEBUG(OutputTarget, fmtstr, ...) ZEN_OPERATION_LOG(OutputTarget, zen::logging::Debug, fmtstr, ##__VA_ARGS__) +#define ZEN_OPERATION_LOG_WARN(OutputTarget, fmtstr, ...) ZEN_OPERATION_LOG(OutputTarget, zen::logging::Warn, fmtstr, ##__VA_ARGS__) } // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/partialblockrequestmode.h b/src/zenremotestore/include/zenremotestore/partialblockrequestmode.h new file mode 100644 index 000000000..54adea2b2 --- /dev/null +++ b/src/zenremotestore/include/zenremotestore/partialblockrequestmode.h @@ -0,0 +1,20 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <string_view> + +namespace zen { + +enum EPartialBlockRequestMode +{ + Off, + ZenCacheOnly, + Mixed, + All, + Invalid +}; + +EPartialBlockRequestMode PartialBlockRequestModeFromString(const std::string_view ModeString); + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h b/src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h index e8b7c15c0..c058e1c1f 100644 --- a/src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h +++ b/src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h @@ -2,6 +2,7 @@ #pragma once +#include <zenhttp/httpclient.h> #include <zenremotestore/projectstore/remoteprojectstore.h> namespace zen { @@ -10,9 +11,6 @@ class AuthMgr; struct BuildsRemoteStoreOptions : RemoteStoreOptions { - std::string Host; - std::string OverrideHost; - std::string ZenHost; std::string Namespace; std::string Bucket; Oid BuildId; @@ -22,18 +20,16 @@ struct BuildsRemoteStoreOptions : RemoteStoreOptions std::filesystem::path OidcExePath; bool ForceDisableBlocks = false; bool ForceDisableTempBlocks = false; - bool AssumeHttp2 = false; - bool PopulateCache = true; IoBuffer MetaData; size_t MaximumInMemoryDownloadSize = 1024u * 1024u; }; -std::shared_ptr<RemoteProjectStore> CreateJupiterBuildsRemoteStore(LoggerRef InLog, - const BuildsRemoteStoreOptions& Options, - const std::filesystem::path& TempFilePath, - bool Quiet, - bool Unattended, - bool Hidden, - WorkerThreadPool& CacheBackgroundWorkerPool); +struct BuildStorageResolveResult; + +std::shared_ptr<RemoteProjectStore> CreateJupiterBuildsRemoteStore(LoggerRef InLog, + const BuildStorageResolveResult& ResolveResult, + std::function<HttpClientAccessToken()>&& TokenProvider, + const BuildsRemoteStoreOptions& Options, + const std::filesystem::path& TempFilePath); } // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h b/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h index 008f94351..084d975a2 100644 --- a/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h +++ b/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h @@ -5,7 +5,9 @@ #include <zencore/jobqueue.h> #include <zenstore/projectstore.h> +#include <zenremotestore/builds/buildstoragecache.h> #include <zenremotestore/chunking/chunkblock.h> +#include <zenremotestore/partialblockrequestmode.h> #include <unordered_set> @@ -73,24 +75,35 @@ public: std::vector<ChunkBlockDescription> Blocks; }; + struct GetBlockDescriptionsResult : public Result + { + std::vector<ChunkBlockDescription> Blocks; + }; + + struct LoadAttachmentRangesResult : public Result + { + IoBuffer Bytes; + std::vector<std::pair<uint64_t, uint64_t>> Ranges; + }; + struct RemoteStoreInfo { - bool CreateBlocks; - bool UseTempBlockFiles; - bool AllowChunking; + bool CreateBlocks = false; + bool UseTempBlockFiles = false; + bool AllowChunking = false; std::string ContainerName; std::string Description; }; struct Stats { - std::uint64_t m_SentBytes; - std::uint64_t m_ReceivedBytes; - std::uint64_t m_RequestTimeNS; - std::uint64_t m_RequestCount; - std::uint64_t m_PeakSentBytes; - std::uint64_t m_PeakReceivedBytes; - std::uint64_t m_PeakBytesPerSec; + std::uint64_t m_SentBytes = 0; + std::uint64_t m_ReceivedBytes = 0; + std::uint64_t m_RequestTimeNS = 0; + std::uint64_t m_RequestCount = 0; + std::uint64_t m_PeakSentBytes = 0; + std::uint64_t m_PeakReceivedBytes = 0; + std::uint64_t m_PeakBytesPerSec = 0; }; struct ExtendedStats @@ -111,12 +124,17 @@ public: virtual FinalizeResult FinalizeContainer(const IoHash& RawHash) = 0; virtual SaveAttachmentsResult SaveAttachments(const std::vector<SharedBuffer>& Payloads) = 0; - virtual LoadContainerResult LoadContainer() = 0; - virtual GetKnownBlocksResult GetKnownBlocks() = 0; - virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) = 0; - virtual LoadAttachmentsResult LoadAttachments(const std::vector<IoHash>& RawHashes) = 0; + virtual LoadContainerResult LoadContainer() = 0; + virtual GetKnownBlocksResult GetKnownBlocks() = 0; + virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes, + BuildStorageCache* OptionalCache, + const Oid& CacheBuildId) = 0; + + virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) = 0; - virtual void Flush() = 0; + virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) = 0; + virtual LoadAttachmentsResult LoadAttachments(const std::vector<IoHash>& RawHashes) = 0; }; struct RemoteStoreOptions @@ -153,14 +171,15 @@ RemoteProjectStore::LoadContainerResult BuildContainer( class JobContext; -RemoteProjectStore::Result SaveOplogContainer(ProjectStore::Oplog& Oplog, - const CbObject& ContainerObject, - const std::function<void(std::span<IoHash> RawHashes)>& OnReferencedAttachments, - const std::function<bool(const IoHash& RawHash)>& HasAttachment, - const std::function<void(const IoHash& BlockHash, std::vector<IoHash>&& Chunks)>& OnNeedBlock, - const std::function<void(const IoHash& RawHash)>& OnNeedAttachment, - const std::function<void(const ChunkedInfo& Chunked)>& OnChunkedAttachment, - JobContext* OptionalContext); +RemoteProjectStore::Result SaveOplogContainer( + ProjectStore::Oplog& Oplog, + const CbObject& ContainerObject, + const std::function<void(std::span<IoHash> RawHashes)>& OnReferencedAttachments, + const std::function<bool(const IoHash& RawHash)>& HasAttachment, + const std::function<void(ThinChunkBlockDescription&& ThinBlockDescription, std::vector<uint32_t>&& NeededChunkIndexes)>& OnNeedBlock, + const std::function<void(const IoHash& RawHash)>& OnNeedAttachment, + const std::function<void(const ChunkedInfo& Chunked)>& OnChunkedAttachment, + JobContext* OptionalContext); RemoteProjectStore::Result SaveOplog(CidStore& ChunkStore, RemoteProjectStore& RemoteStore, @@ -177,15 +196,29 @@ RemoteProjectStore::Result SaveOplog(CidStore& ChunkStore, bool IgnoreMissingAttachments, JobContext* OptionalContext); -RemoteProjectStore::Result LoadOplog(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - ProjectStore::Oplog& Oplog, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, - bool ForceDownload, - bool IgnoreMissingAttachments, - bool CleanOplog, - JobContext* OptionalContext); +struct LoadOplogContext +{ + CidStore& ChunkStore; + RemoteProjectStore& RemoteStore; + BuildStorageCache* OptionalCache = nullptr; + Oid CacheBuildId = Oid::Zero; + BuildStorageCache::Statistics* OptionalCacheStats = nullptr; + ProjectStore::Oplog& Oplog; + WorkerThreadPool& NetworkWorkerPool; + WorkerThreadPool& WorkerPool; + bool ForceDownload = false; + bool IgnoreMissingAttachments = false; + bool CleanOplog = false; + EPartialBlockRequestMode PartialBlockRequestMode = EPartialBlockRequestMode::All; + bool PopulateCache = false; + double StoreLatencySec = -1.0; + uint64_t StoreMaxRangeCountPerRequest = 1; + double CacheLatencySec = -1.0; + uint64_t CacheMaxRangeCountPerRequest = 1; + JobContext* OptionalJobContext = nullptr; +}; + +RemoteProjectStore::Result LoadOplog(LoadOplogContext&& Context); std::vector<IoHash> GetBlockHashesFromOplog(CbObjectView ContainerObject); std::vector<ThinChunkBlockDescription> GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> IncludeBlockHashes); diff --git a/src/zenremotestore/jupiter/jupiterhost.cpp b/src/zenremotestore/jupiter/jupiterhost.cpp index 7706f00c2..314aafc78 100644 --- a/src/zenremotestore/jupiter/jupiterhost.cpp +++ b/src/zenremotestore/jupiter/jupiterhost.cpp @@ -59,7 +59,22 @@ TestJupiterEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const bool HttpClient::Response TestResponse = TestHttpClient.Get("/health/live"); if (TestResponse.IsSuccess()) { - return {.Success = true}; + // TODO: dan.engelbrecht 20260305 - replace this naive nginx detection with proper capabilites end point once it exists in Jupiter + uint64_t MaxRangeCountPerRequest = 1; + if (auto It = TestResponse.Header.Entries.find("Server"); It != TestResponse.Header.Entries.end()) + { + if (StrCaseCompare(It->second.c_str(), "nginx", 5) == 0) + { + MaxRangeCountPerRequest = 128u; // This leaves more than 2k header space for auth token etc + } + } + LatencyTestResult LatencyResult = MeasureLatency(TestHttpClient, "/health/ready"); + + if (!LatencyResult.Success) + { + return {.Success = false, .FailureReason = LatencyResult.FailureReason}; + } + return {.Success = true, .LatencySeconds = LatencyResult.LatencySeconds, .MaxRangeCountPerRequest = MaxRangeCountPerRequest}; } return {.Success = false, .FailureReason = TestResponse.ErrorMessage("")}; } diff --git a/src/zenremotestore/jupiter/jupitersession.cpp b/src/zenremotestore/jupiter/jupitersession.cpp index 1bc6564ce..52f9eb678 100644 --- a/src/zenremotestore/jupiter/jupitersession.cpp +++ b/src/zenremotestore/jupiter/jupitersession.cpp @@ -852,6 +852,71 @@ JupiterSession::GetBuildBlob(std::string_view Namespace, return detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv); } +BuildBlobRangesResult +JupiterSession::GetBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + std::filesystem::path TempFolderPath, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) +{ + HttpClient::KeyValueMap Headers; + if (!Ranges.empty()) + { + ExtendableStringBuilder<512> SB; + for (const std::pair<uint64_t, uint64_t>& R : Ranges) + { + if (SB.Size() > 0) + { + SB << ", "; + } + SB << R.first << "-" << R.first + R.second - 1; + } + Headers.Entries.insert({"Range", fmt::format("bytes={}", SB.ToView())}); + } + std::string Url = fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}?supportsRedirect={}", + Namespace, + BucketId, + BuildId, + Hash.ToHexString(), + m_AllowRedirect ? "true"sv : "false"sv); + + HttpClient::Response Response = m_HttpClient.Download(Url, TempFolderPath, Headers); + if (Response.StatusCode == HttpResponseCode::RangeNotSatisfiable && Ranges.size() > 1) + { + // Requests to Jupiter that is not served via nginx (content not stored locally in the file system) can not serve multi-range + // requests (asp.net limitation) This rejection is not implemented as of 2026-03-02, it is in the backlog (@joakim.lindqvist) + // If we encounter this error we fall back to a single range which covers all the requested ranges + uint64_t RangeStart = Ranges.front().first; + uint64_t RangeEnd = Ranges.back().first + Ranges.back().second - 1; + Headers.Entries.insert_or_assign("Range", fmt::format("bytes={}-{}", RangeStart, RangeEnd)); + Response = m_HttpClient.Download(Url, TempFolderPath, Headers); + } + if (Response.IsSuccess()) + { + // If we get a redirect to S3 or a non-Jupiter endpoint the content type will not be correct, validate it and set it + if (m_AllowRedirect && (Response.ResponsePayload.GetContentType() == HttpContentType::kBinary)) + { + IoHash ValidateRawHash; + uint64_t ValidateRawSize = 0; + if (!Headers.Entries.contains("Range")) + { + ZEN_ASSERT_SLOW(CompressedBuffer::ValidateCompressedHeader(Response.ResponsePayload, + ValidateRawHash, + ValidateRawSize, + /*OutOptionalTotalCompressedSize*/ nullptr)); + ZEN_ASSERT_SLOW(ValidateRawHash == Hash); + ZEN_ASSERT_SLOW(ValidateRawSize > 0); + ZEN_UNUSED(ValidateRawHash, ValidateRawSize); + Response.ResponsePayload.SetContentType(ZenContentType::kCompressedBinary); + } + } + } + BuildBlobRangesResult Result = {detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv)}; + Result.Ranges = Response.GetRanges(Ranges); + return Result; +} + JupiterResult JupiterSession::PutBlockMetadata(std::string_view Namespace, std::string_view BucketId, diff --git a/src/zenremotestore/operationlogoutput.cpp b/src/zenremotestore/operationlogoutput.cpp index 0837ed716..5ed844c9d 100644 --- a/src/zenremotestore/operationlogoutput.cpp +++ b/src/zenremotestore/operationlogoutput.cpp @@ -3,6 +3,7 @@ #include <zenremotestore/operationlogoutput.h> #include <zencore/logging.h> +#include <zencore/logging/logger.h> ZEN_THIRD_PARTY_INCLUDES_START #include <gsl/gsl-lite.hpp> @@ -30,13 +31,11 @@ class StandardLogOutput : public OperationLogOutput { public: StandardLogOutput(LoggerRef& Log) : m_Log(Log) {} - virtual void EmitLogMessage(int LogLevel, std::string_view Format, fmt::format_args Args) override + virtual void EmitLogMessage(const logging::LogPoint& Point, fmt::format_args Args) override { - if (m_Log.ShouldLog(LogLevel)) + if (m_Log.ShouldLog(Point.Level)) { - fmt::basic_memory_buffer<char, 250> MessageBuffer; - fmt::vformat_to(fmt::appender(MessageBuffer), Format, Args); - ZEN_LOG(m_Log, LogLevel, "{}", std::string_view(MessageBuffer.data(), MessageBuffer.size())); + m_Log->Log(Point, Args); } } @@ -47,7 +46,7 @@ public: } virtual void SetLogOperationProgress(uint32_t StepIndex, uint32_t StepCount) override { - const size_t PercentDone = StepCount > 0u ? gsl::narrow<uint8_t>((100 * StepIndex) / StepCount) : 0u; + [[maybe_unused]] const size_t PercentDone = StepCount > 0u ? gsl::narrow<uint8_t>((100 * StepIndex) / StepCount) : 0u; ZEN_OPERATION_LOG_INFO(*this, "{}: {}%", m_LogOperationName, PercentDone); } virtual uint32_t GetProgressUpdateDelayMS() override { return 2000; } @@ -59,13 +58,14 @@ public: private: LoggerRef m_Log; std::string m_LogOperationName; + LoggerRef Log() { return m_Log; } }; void StandardLogOutputProgressBar::UpdateState(const State& NewState, bool DoLinebreak) { ZEN_UNUSED(DoLinebreak); - const size_t PercentDone = + [[maybe_unused]] const size_t PercentDone = NewState.TotalCount > 0u ? gsl::narrow<uint8_t>((100 * (NewState.TotalCount - NewState.RemainingCount)) / NewState.TotalCount) : 0u; std::string Task = NewState.Task; switch (NewState.Status) @@ -95,7 +95,7 @@ StandardLogOutputProgressBar::Finish() } OperationLogOutput* -CreateStandardLogOutput(LoggerRef& Log) +CreateStandardLogOutput(LoggerRef Log) { return new StandardLogOutput(Log); } diff --git a/src/zenremotestore/partialblockrequestmode.cpp b/src/zenremotestore/partialblockrequestmode.cpp new file mode 100644 index 000000000..b3edf515b --- /dev/null +++ b/src/zenremotestore/partialblockrequestmode.cpp @@ -0,0 +1,27 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenremotestore/partialblockrequestmode.h> + +#include <zencore/string.h> + +namespace zen { + +EPartialBlockRequestMode +PartialBlockRequestModeFromString(const std::string_view ModeString) +{ + switch (HashStringAsLowerDjb2(ModeString)) + { + case HashStringDjb2("false"): + return EPartialBlockRequestMode::Off; + case HashStringDjb2("zencacheonly"): + return EPartialBlockRequestMode::ZenCacheOnly; + case HashStringDjb2("mixed"): + return EPartialBlockRequestMode::Mixed; + case HashStringDjb2("true"): + return EPartialBlockRequestMode::All; + default: + return EPartialBlockRequestMode::Invalid; + } +} + +} // namespace zen diff --git a/src/zenremotestore/projectstore/buildsremoteprojectstore.cpp b/src/zenremotestore/projectstore/buildsremoteprojectstore.cpp index a8e883dde..2282a31dd 100644 --- a/src/zenremotestore/projectstore/buildsremoteprojectstore.cpp +++ b/src/zenremotestore/projectstore/buildsremoteprojectstore.cpp @@ -7,8 +7,6 @@ #include <zencore/fmtutils.h> #include <zencore/scopeguard.h> -#include <zenhttp/httpclientauth.h> -#include <zenremotestore/builds/buildstoragecache.h> #include <zenremotestore/builds/buildstorageutil.h> #include <zenremotestore/builds/jupiterbuildstorage.h> #include <zenremotestore/operationlogoutput.h> @@ -26,18 +24,14 @@ class BuildsRemoteStore : public RemoteProjectStore public: BuildsRemoteStore(LoggerRef InLog, const HttpClientSettings& ClientSettings, - HttpClientSettings* OptionalCacheClientSettings, std::string_view HostUrl, - std::string_view CacheUrl, const std::filesystem::path& TempFilePath, - WorkerThreadPool& CacheBackgroundWorkerPool, std::string_view Namespace, std::string_view Bucket, const Oid& BuildId, const IoBuffer& MetaData, bool ForceDisableBlocks, - bool ForceDisableTempBlocks, - bool PopulateCache) + bool ForceDisableTempBlocks) : m_Log(InLog) , m_BuildStorageHttp(HostUrl, ClientSettings) , m_BuildStorage(CreateJupiterBuildStorage(Log(), @@ -53,20 +47,8 @@ public: , m_MetaData(MetaData) , m_EnableBlocks(!ForceDisableBlocks) , m_UseTempBlocks(!ForceDisableTempBlocks) - , m_PopulateCache(PopulateCache) { m_MetaData.MakeOwned(); - if (OptionalCacheClientSettings) - { - ZEN_ASSERT(!CacheUrl.empty()); - m_BuildCacheStorageHttp = std::make_unique<HttpClient>(CacheUrl, *OptionalCacheClientSettings); - m_BuildCacheStorage = CreateZenBuildStorageCache(*m_BuildCacheStorageHttp, - m_StorageCacheStats, - Namespace, - Bucket, - TempFilePath, - CacheBackgroundWorkerPool); - } } virtual RemoteStoreInfo GetInfo() const override @@ -75,9 +57,8 @@ public: .UseTempBlockFiles = m_UseTempBlocks, .AllowChunking = true, .ContainerName = fmt::format("{}/{}/{}", m_Namespace, m_Bucket, m_BuildId), - .Description = fmt::format("[cloud] {}{}. SessionId: {}. {}/{}/{}"sv, + .Description = fmt::format("[cloud] {}. SessionId: {}. {}/{}/{}"sv, m_BuildStorageHttp.GetBaseUri(), - m_BuildCacheStorage ? fmt::format(" (Cache: {})", m_BuildCacheStorageHttp->GetBaseUri()) : ""sv, m_BuildStorageHttp.GetSessionId(), m_Namespace, m_Bucket, @@ -86,15 +67,13 @@ public: virtual Stats GetStats() const override { - return { - .m_SentBytes = m_BuildStorageStats.TotalBytesWritten.load() + m_StorageCacheStats.TotalBytesWritten.load(), - .m_ReceivedBytes = m_BuildStorageStats.TotalBytesRead.load() + m_StorageCacheStats.TotalBytesRead.load(), - .m_RequestTimeNS = m_BuildStorageStats.TotalRequestTimeUs.load() * 1000 + m_StorageCacheStats.TotalRequestTimeUs.load() * 1000, - .m_RequestCount = m_BuildStorageStats.TotalRequestCount.load() + m_StorageCacheStats.TotalRequestCount.load(), - .m_PeakSentBytes = Max(m_BuildStorageStats.PeakSentBytes.load(), m_StorageCacheStats.PeakSentBytes.load()), - .m_PeakReceivedBytes = Max(m_BuildStorageStats.PeakReceivedBytes.load(), m_StorageCacheStats.PeakReceivedBytes.load()), - .m_PeakBytesPerSec = Max(m_BuildStorageStats.PeakBytesPerSec.load(), m_StorageCacheStats.PeakBytesPerSec.load()), - }; + return {.m_SentBytes = m_BuildStorageStats.TotalBytesWritten.load(), + .m_ReceivedBytes = m_BuildStorageStats.TotalBytesRead.load(), + .m_RequestTimeNS = m_BuildStorageStats.TotalRequestTimeUs.load() * 1000, + .m_RequestCount = m_BuildStorageStats.TotalRequestCount.load(), + .m_PeakSentBytes = m_BuildStorageStats.PeakSentBytes.load(), + .m_PeakReceivedBytes = m_BuildStorageStats.PeakReceivedBytes.load(), + .m_PeakBytesPerSec = m_BuildStorageStats.PeakBytesPerSec.load()}; } virtual bool GetExtendedStats(ExtendedStats& OutStats) const override @@ -109,11 +88,6 @@ public: } Result = true; } - if (m_BuildCacheStorage) - { - OutStats.m_ReceivedBytesPerSource.insert_or_assign("Cache", m_StorageCacheStats.TotalBytesRead); - Result = true; - } return Result; } @@ -441,7 +415,7 @@ public: catch (const HttpClientError& Ex) { Result.ErrorCode = MakeErrorCode(Ex); - Result.Reason = fmt::format("Failed listing know blocks for {}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed listing known blocks for {}/{}/{}/{}. Reason: '{}'", m_BuildStorageHttp.GetBaseUri(), m_Namespace, m_Bucket, @@ -451,7 +425,7 @@ public: catch (const std::exception& Ex) { Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("Failed listing know blocks for {}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed listing known blocks for {}/{}/{}/{}. Reason: '{}'", m_BuildStorageHttp.GetBaseUri(), m_Namespace, m_Bucket, @@ -462,6 +436,53 @@ public: return Result; } + virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes, + BuildStorageCache* OptionalCache, + const Oid& CacheBuildId) override + { + std::unique_ptr<OperationLogOutput> Output(CreateStandardLogOutput(Log())); + + ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); + ZEN_ASSERT(OptionalCache == nullptr || CacheBuildId == m_BuildId); + + GetBlockDescriptionsResult Result; + Stopwatch Timer; + auto _ = MakeGuard([&Timer, &Result]() { Result.ElapsedSeconds = Timer.GetElapsedTimeUs() / 1000000.0; }); + + try + { + Result.Blocks = zen::GetBlockDescriptions(*Output, + *m_BuildStorage, + OptionalCache, + m_BuildId, + BlockHashes, + /*AttemptFallback*/ false, + /*IsQuiet*/ false, + /*IsVerbose)*/ false); + } + catch (const HttpClientError& Ex) + { + Result.ErrorCode = MakeErrorCode(Ex); + Result.Reason = fmt::format("Failed listing known blocks for {}/{}/{}/{}. Reason: '{}'", + m_BuildStorageHttp.GetBaseUri(), + m_Namespace, + m_Bucket, + m_BuildId, + Ex.what()); + } + catch (const std::exception& Ex) + { + Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); + Result.Reason = fmt::format("Failed listing known blocks for {}/{}/{}/{}. Reason: '{}'", + m_BuildStorageHttp.GetBaseUri(), + m_Namespace, + m_Bucket, + m_BuildId, + Ex.what()); + } + return Result; + } + virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override { ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); @@ -472,44 +493,73 @@ public: try { - if (m_BuildCacheStorage) - { - IoBuffer CachedBlob = m_BuildCacheStorage->GetBuildBlob(m_BuildId, RawHash); - if (CachedBlob) - { - Result.Bytes = std::move(CachedBlob); - } - } - if (!Result.Bytes) + Result.Bytes = m_BuildStorage->GetBuildBlob(m_BuildId, RawHash); + } + catch (const HttpClientError& Ex) + { + Result.ErrorCode = MakeErrorCode(Ex); + Result.Reason = fmt::format("Failed getting blob {}/{}/{}/{}/{}. Reason: '{}'", + m_BuildStorageHttp.GetBaseUri(), + m_Namespace, + m_Bucket, + m_BuildId, + RawHash, + Ex.what()); + } + catch (const std::exception& Ex) + { + Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); + Result.Reason = fmt::format("Failed getting blob {}/{}/{}/{}/{}. Reason: '{}'", + m_BuildStorageHttp.GetBaseUri(), + m_Namespace, + m_Bucket, + m_BuildId, + RawHash, + Ex.what()); + } + + return Result; + } + + virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_ASSERT(!Ranges.empty()); + LoadAttachmentRangesResult Result; + Stopwatch Timer; + auto _ = MakeGuard([&Timer, &Result]() { Result.ElapsedSeconds = Timer.GetElapsedTimeUs() / 1000000.0; }); + + try + { + BuildStorageBase::BuildBlobRanges BlobRanges = m_BuildStorage->GetBuildBlobRanges(m_BuildId, RawHash, Ranges); + if (BlobRanges.PayloadBuffer) { - Result.Bytes = m_BuildStorage->GetBuildBlob(m_BuildId, RawHash); - if (m_BuildCacheStorage && Result.Bytes && m_PopulateCache) - { - m_BuildCacheStorage->PutBuildBlob(m_BuildId, - RawHash, - Result.Bytes.GetContentType(), - CompositeBuffer(SharedBuffer(Result.Bytes))); - } + Result.Bytes = std::move(BlobRanges.PayloadBuffer); + Result.Ranges = std::move(BlobRanges.Ranges); } } catch (const HttpClientError& Ex) { Result.ErrorCode = MakeErrorCode(Ex); - Result.Reason = fmt::format("Failed listing know blocks for {}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed getting {} ranges for blob {}/{}/{}/{}/{}. Reason: '{}'", + Ranges.size(), m_BuildStorageHttp.GetBaseUri(), m_Namespace, m_Bucket, m_BuildId, + RawHash, Ex.what()); } catch (const std::exception& Ex) { Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("Failed listing know blocks for {}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed getting {} ranges for blob {}/{}/{}/{}/{}. Reason: '{}'", + Ranges.size(), m_BuildStorageHttp.GetBaseUri(), m_Namespace, m_Bucket, m_BuildId, + RawHash, Ex.what()); } @@ -524,38 +574,6 @@ public: std::vector<IoHash> AttachmentsLeftToFind = RawHashes; - if (m_BuildCacheStorage) - { - std::vector<BuildStorageCache::BlobExistsResult> ExistCheck = m_BuildCacheStorage->BlobsExists(m_BuildId, RawHashes); - if (ExistCheck.size() == RawHashes.size()) - { - AttachmentsLeftToFind.clear(); - for (size_t BlobIndex = 0; BlobIndex < RawHashes.size(); BlobIndex++) - { - const IoHash& Hash = RawHashes[BlobIndex]; - const BuildStorageCache::BlobExistsResult& BlobExists = ExistCheck[BlobIndex]; - if (BlobExists.HasBody) - { - IoBuffer CachedPayload = m_BuildCacheStorage->GetBuildBlob(m_BuildId, Hash); - if (CachedPayload) - { - Result.Chunks.emplace_back( - std::pair<IoHash, CompressedBuffer>{Hash, - CompressedBuffer::FromCompressedNoValidate(std::move(CachedPayload))}); - } - else - { - AttachmentsLeftToFind.push_back(Hash); - } - } - else - { - AttachmentsLeftToFind.push_back(Hash); - } - } - } - } - for (const IoHash& Hash : AttachmentsLeftToFind) { LoadAttachmentResult ChunkResult = LoadAttachment(Hash); @@ -564,27 +582,12 @@ public: return LoadAttachmentsResult{ChunkResult}; } ZEN_DEBUG("Loaded attachment in {}", NiceTimeSpanMs(static_cast<uint64_t>(ChunkResult.ElapsedSeconds * 1000))); - if (m_BuildCacheStorage && ChunkResult.Bytes && m_PopulateCache) - { - m_BuildCacheStorage->PutBuildBlob(m_BuildId, - Hash, - ChunkResult.Bytes.GetContentType(), - CompositeBuffer(SharedBuffer(ChunkResult.Bytes))); - } Result.Chunks.emplace_back( std::pair<IoHash, CompressedBuffer>{Hash, CompressedBuffer::FromCompressedNoValidate(std::move(ChunkResult.Bytes))}); } return Result; } - virtual void Flush() override - { - if (m_BuildCacheStorage) - { - m_BuildCacheStorage->Flush(100, [](intptr_t) { return false; }); - } - } - private: static int MakeErrorCode(const HttpClientError& Ex) { @@ -601,10 +604,6 @@ private: HttpClient m_BuildStorageHttp; std::unique_ptr<BuildStorageBase> m_BuildStorage; - BuildStorageCache::Statistics m_StorageCacheStats; - std::unique_ptr<HttpClient> m_BuildCacheStorageHttp; - std::unique_ptr<BuildStorageCache> m_BuildCacheStorage; - const std::string m_Namespace; const std::string m_Bucket; const Oid m_BuildId; @@ -613,120 +612,35 @@ private: const bool m_EnableBlocks = true; const bool m_UseTempBlocks = true; const bool m_AllowRedirect = false; - const bool m_PopulateCache = true; }; std::shared_ptr<RemoteProjectStore> -CreateJupiterBuildsRemoteStore(LoggerRef InLog, - const BuildsRemoteStoreOptions& Options, - const std::filesystem::path& TempFilePath, - bool Quiet, - bool Unattended, - bool Hidden, - WorkerThreadPool& CacheBackgroundWorkerPool) +CreateJupiterBuildsRemoteStore(LoggerRef InLog, + const BuildStorageResolveResult& ResolveResult, + std::function<HttpClientAccessToken()>&& TokenProvider, + const BuildsRemoteStoreOptions& Options, + const std::filesystem::path& TempFilePath) { - std::string Host = Options.Host; - if (!Host.empty() && Host.find("://"sv) == std::string::npos) - { - // Assume https URL - Host = fmt::format("https://{}"sv, Host); - } - std::string OverrideUrl = Options.OverrideHost; - if (!OverrideUrl.empty() && OverrideUrl.find("://"sv) == std::string::npos) - { - // Assume https URL - OverrideUrl = fmt::format("https://{}"sv, OverrideUrl); - } - std::string ZenHost = Options.ZenHost; - if (!ZenHost.empty() && ZenHost.find("://"sv) == std::string::npos) - { - // Assume https URL - ZenHost = fmt::format("https://{}"sv, ZenHost); - } - - // 1) openid-provider if given (assumes oidctoken.exe -Zen true has been run with matching Options.OpenIdProvider - // 2) Access token as parameter in request - // 3) Environment variable (different win vs linux/mac) - // 4) Default openid-provider (assumes oidctoken.exe -Zen true has been run with matching Options.OpenIdProvider - - std::function<HttpClientAccessToken()> TokenProvider; - if (!Options.OpenIdProvider.empty()) - { - TokenProvider = httpclientauth::CreateFromOpenIdProvider(Options.AuthManager, Options.OpenIdProvider); - } - else if (!Options.AccessToken.empty()) - { - TokenProvider = httpclientauth::CreateFromStaticToken(Options.AccessToken); - } - else if (!Options.OidcExePath.empty()) - { - if (auto TokenProviderMaybe = httpclientauth::CreateFromOidcTokenExecutable(Options.OidcExePath, - Host.empty() ? OverrideUrl : Host, - Quiet, - Unattended, - Hidden); - TokenProviderMaybe) - { - TokenProvider = TokenProviderMaybe.value(); - } - } - - if (!TokenProvider) - { - TokenProvider = httpclientauth::CreateFromDefaultOpenIdProvider(Options.AuthManager); - } - - BuildStorageResolveResult ResolveRes; - { - HttpClientSettings ClientSettings{.LogCategory = "httpbuildsclient", - .AccessTokenProvider = TokenProvider, - .AssumeHttp2 = Options.AssumeHttp2, - .AllowResume = true, - .RetryCount = 2}; - - std::unique_ptr<OperationLogOutput> Output(CreateStandardLogOutput(InLog)); - - ResolveRes = - ResolveBuildStorage(*Output, ClientSettings, Host, OverrideUrl, ZenHost, ZenCacheResolveMode::Discovery, /*Verbose*/ false); - } - HttpClientSettings ClientSettings{.LogCategory = "httpbuildsclient", .ConnectTimeout = std::chrono::milliseconds(3000), .Timeout = std::chrono::milliseconds(1800000), .AccessTokenProvider = std::move(TokenProvider), - .AssumeHttp2 = ResolveRes.HostAssumeHttp2, + .AssumeHttp2 = ResolveResult.Cloud.AssumeHttp2, .AllowResume = true, .RetryCount = 4, .MaximumInMemoryDownloadSize = Options.MaximumInMemoryDownloadSize}; - std::unique_ptr<HttpClientSettings> CacheClientSettings; - - if (!ResolveRes.CacheUrl.empty()) - { - CacheClientSettings = - std::make_unique<HttpClientSettings>(HttpClientSettings{.LogCategory = "httpcacheclient", - .ConnectTimeout = std::chrono::milliseconds{3000}, - .Timeout = std::chrono::milliseconds{30000}, - .AssumeHttp2 = ResolveRes.CacheAssumeHttp2, - .AllowResume = true, - .RetryCount = 0, - .MaximumInMemoryDownloadSize = Options.MaximumInMemoryDownloadSize}); - } - std::shared_ptr<RemoteProjectStore> RemoteStore = std::make_shared<BuildsRemoteStore>(InLog, ClientSettings, - CacheClientSettings.get(), - ResolveRes.HostUrl, - ResolveRes.CacheUrl, + ResolveResult.Cloud.Address, TempFilePath, - CacheBackgroundWorkerPool, Options.Namespace, Options.Bucket, Options.BuildId, Options.MetaData, Options.ForceDisableBlocks, - Options.ForceDisableTempBlocks, - Options.PopulateCache); + Options.ForceDisableTempBlocks); + return RemoteStore; } diff --git a/src/zenremotestore/projectstore/fileremoteprojectstore.cpp b/src/zenremotestore/projectstore/fileremoteprojectstore.cpp index 3a67d3842..bb21de12c 100644 --- a/src/zenremotestore/projectstore/fileremoteprojectstore.cpp +++ b/src/zenremotestore/projectstore/fileremoteprojectstore.cpp @@ -7,8 +7,12 @@ #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> +#include <zencore/scopeguard.h> #include <zencore/timer.h> #include <zenhttp/httpcommon.h> +#include <zenremotestore/builds/buildstoragecache.h> + +#include <numeric> namespace zen { @@ -74,9 +78,11 @@ public: virtual SaveResult SaveContainer(const IoBuffer& Payload) override { - Stopwatch Timer; SaveResult Result; + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); + { CbObject ContainerObject = LoadCompactBinaryObject(Payload); @@ -87,6 +93,10 @@ public: { Result.Needs.insert(AttachmentHash); } + else if (std::filesystem::path AttachmentMetaPath = GetAttachmentMetaPath(AttachmentHash); IsFile(AttachmentMetaPath)) + { + BasicFile TouchIt(AttachmentMetaPath, BasicFile::Mode::kWrite); + } }); } @@ -112,14 +122,18 @@ public: Result.Reason = fmt::format("Failed saving oplog container to '{}'. Reason: {}", ContainerPath, Ex.what()); } AddStats(Payload.GetSize(), 0, Timer.GetElapsedTimeUs() * 1000); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&&) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, + const IoHash& RawHash, + ChunkBlockDescription&& BlockDescription) override { - Stopwatch Timer; - SaveAttachmentResult Result; + SaveAttachmentResult Result; + + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); + std::filesystem::path ChunkPath = GetAttachmentPath(RawHash); if (!IsFile(ChunkPath)) { @@ -142,14 +156,33 @@ public: Result.Reason = fmt::format("Failed saving oplog attachment to '{}'. Reason: {}", ChunkPath, Ex.what()); } } + if (!Result.ErrorCode && BlockDescription.BlockHash != IoHash::Zero) + { + try + { + std::filesystem::path MetaPath = GetAttachmentMetaPath(RawHash); + CbObject MetaData = BuildChunkBlockDescription(BlockDescription, {}); + SharedBuffer MetaBuffer = MetaData.GetBuffer(); + BasicFile MetaFile; + MetaFile.Open(MetaPath, BasicFile::Mode::kTruncate); + MetaFile.Write(MetaBuffer.GetView(), 0); + } + catch (const std::exception& Ex) + { + Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); + Result.Reason = fmt::format("Failed saving block description to '{}'. Reason: {}", RawHash, Ex.what()); + } + } AddStats(Payload.GetSize(), 0, Timer.GetElapsedTimeUs() * 1000); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; return Result; } virtual SaveAttachmentsResult SaveAttachments(const std::vector<SharedBuffer>& Chunks) override { + SaveAttachmentsResult Result; + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); for (const SharedBuffer& Chunk : Chunks) { @@ -157,12 +190,10 @@ public: SaveAttachmentResult ChunkResult = SaveAttachment(Compressed.GetCompressed(), Compressed.DecodeRawHash(), {}); if (ChunkResult.ErrorCode) { - ChunkResult.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; - return SaveAttachmentsResult{ChunkResult}; + Result = SaveAttachmentsResult{ChunkResult}; + break; } } - SaveAttachmentsResult Result; - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; return Result; } @@ -172,21 +203,60 @@ public: virtual GetKnownBlocksResult GetKnownBlocks() override { + Stopwatch Timer; if (m_OptionalBaseName.empty()) { - return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent)}}; + size_t MaxBlockCount = 10000; + + GetKnownBlocksResult Result; + + DirectoryContent Content; + GetDirectoryContent( + m_OutputPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeModificationTick, + Content); + std::vector<size_t> RecentOrder(Content.Files.size()); + std::iota(RecentOrder.begin(), RecentOrder.end(), 0u); + std::sort(RecentOrder.begin(), RecentOrder.end(), [&Content](size_t Lhs, size_t Rhs) { + return Content.FileModificationTicks[Lhs] > Content.FileModificationTicks[Rhs]; + }); + + for (size_t FileIndex : RecentOrder) + { + std::filesystem::path MetaPath = Content.Files[FileIndex]; + if (MetaPath.extension() == MetaExtension) + { + IoBuffer MetaFile = ReadFile(MetaPath).Flatten(); + CbValidateError Err; + CbObject ValidatedObject = ValidateAndReadCompactBinaryObject(std::move(MetaFile), Err); + if (Err == CbValidateError::None) + { + ChunkBlockDescription Description = ParseChunkBlockDescription(ValidatedObject); + if (Description.BlockHash != IoHash::Zero) + { + Result.Blocks.emplace_back(std::move(Description)); + if (Result.Blocks.size() == MaxBlockCount) + { + break; + } + } + } + } + } + + Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; + return Result; } LoadContainerResult LoadResult = LoadContainer(m_OptionalBaseName); if (LoadResult.ErrorCode) { return GetKnownBlocksResult{LoadResult}; } - Stopwatch Timer; std::vector<IoHash> BlockHashes = GetBlockHashesFromOplog(LoadResult.ContainerObject); if (BlockHashes.empty()) { return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent), - .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; + .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeMs() / 1000.0}}; } std::vector<IoHash> ExistingBlockHashes; for (const IoHash& RawHash : BlockHashes) @@ -200,15 +270,15 @@ public: if (ExistingBlockHashes.empty()) { return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent), - .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; + .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeMs() / 1000.0}}; } std::vector<ThinChunkBlockDescription> ThinKnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); - const size_t KnowBlockCount = ThinKnownBlocks.size(); + const size_t KnownBlockCount = ThinKnownBlocks.size(); - GetKnownBlocksResult Result{{.ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; - Result.Blocks.resize(KnowBlockCount); - for (size_t BlockIndex = 0; BlockIndex < KnowBlockCount; BlockIndex++) + GetKnownBlocksResult Result{{.ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeMs() / 1000.0}}; + Result.Blocks.resize(KnownBlockCount); + for (size_t BlockIndex = 0; BlockIndex < KnownBlockCount; BlockIndex++) { Result.Blocks[BlockIndex].BlockHash = ThinKnownBlocks[BlockIndex].BlockHash; Result.Blocks[BlockIndex].ChunkRawHashes = std::move(ThinKnownBlocks[BlockIndex].ChunkRawHashes); @@ -217,16 +287,88 @@ public: return Result; } + virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes, + BuildStorageCache* OptionalCache, + const Oid& CacheBuildId) override + { + GetBlockDescriptionsResult Result; + + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); + + Result.Blocks.reserve(BlockHashes.size()); + + uint64_t ByteCount = 0; + + std::vector<ChunkBlockDescription> UnorderedList; + { + if (OptionalCache) + { + std::vector<CbObject> CacheBlockMetadatas = OptionalCache->GetBlobMetadatas(CacheBuildId, BlockHashes); + for (const CbObject& BlockObject : CacheBlockMetadatas) + { + ByteCount += BlockObject.GetSize(); + } + UnorderedList = ParseBlockMetadatas(CacheBlockMetadatas); + } + + tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockDescriptionLookup; + BlockDescriptionLookup.reserve(BlockHashes.size()); + for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++) + { + const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex]; + BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex); + } + + if (UnorderedList.size() < BlockHashes.size()) + { + for (const IoHash& RawHash : BlockHashes) + { + if (!BlockDescriptionLookup.contains(RawHash)) + { + std::filesystem::path MetaPath = GetAttachmentMetaPath(RawHash); + IoBuffer MetaFile = ReadFile(MetaPath).Flatten(); + ByteCount += MetaFile.GetSize(); + CbValidateError Err; + CbObject ValidatedObject = ValidateAndReadCompactBinaryObject(std::move(MetaFile), Err); + if (Err == CbValidateError::None) + { + ChunkBlockDescription Description = ParseChunkBlockDescription(ValidatedObject); + if (Description.BlockHash != IoHash::Zero) + { + BlockDescriptionLookup.insert_or_assign(Description.BlockHash, UnorderedList.size()); + UnorderedList.emplace_back(std::move(Description)); + } + } + } + } + } + + Result.Blocks.reserve(UnorderedList.size()); + for (const IoHash& RawHash : BlockHashes) + { + if (auto It = BlockDescriptionLookup.find(RawHash); It != BlockDescriptionLookup.end()) + { + Result.Blocks.emplace_back(std::move(UnorderedList[It->second])); + } + } + } + AddStats(0, ByteCount, Timer.GetElapsedTimeUs() * 1000); + return Result; + } + virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override { - Stopwatch Timer; - LoadAttachmentResult Result; + LoadAttachmentResult Result; + + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); + std::filesystem::path ChunkPath = GetAttachmentPath(RawHash); if (!IsFile(ChunkPath)) { Result.ErrorCode = gsl::narrow<int>(HttpResponseCode::NotFound); Result.Reason = fmt::format("Failed loading oplog attachment from '{}'. Reason: 'The file does not exist'", ChunkPath.string()); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; return Result; } { @@ -235,7 +377,41 @@ public: Result.Bytes = ChunkFile.ReadAll(); } AddStats(0, Result.Bytes.GetSize(), Timer.GetElapsedTimeUs() * 1000); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; + return Result; + } + + virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_ASSERT(!Ranges.empty()); + LoadAttachmentRangesResult Result; + + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); + + std::filesystem::path ChunkPath = GetAttachmentPath(RawHash); + if (!IsFile(ChunkPath)) + { + Result.ErrorCode = gsl::narrow<int>(HttpResponseCode::NotFound); + Result.Reason = fmt::format("Failed loading oplog attachment from '{}'. Reason: 'The file does not exist'", ChunkPath.string()); + return Result; + } + { + uint64_t Start = Ranges.front().first; + uint64_t Length = Ranges.back().first + Ranges.back().second - Ranges.front().first; + Result.Bytes = IoBufferBuilder::MakeFromFile(ChunkPath, Start, Length); + Result.Ranges.reserve(Ranges.size()); + for (const std::pair<uint64_t, uint64_t>& Range : Ranges) + { + Result.Ranges.push_back(std::make_pair(Range.first - Start, Range.second)); + } + } + AddStats(0, + std::accumulate(Result.Ranges.begin(), + Result.Ranges.end(), + uint64_t(0), + [](uint64_t Current, const std::pair<uint64_t, uint64_t>& Value) { return Current + Value.second; }), + Timer.GetElapsedTimeUs() * 1000); return Result; } @@ -258,20 +434,20 @@ public: return Result; } - virtual void Flush() override {} - private: LoadContainerResult LoadContainer(const std::string& Name) { - Stopwatch Timer; - LoadContainerResult Result; + LoadContainerResult Result; + + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); + std::filesystem::path SourcePath = m_OutputPath; SourcePath.append(Name); if (!IsFile(SourcePath)) { Result.ErrorCode = gsl::narrow<int>(HttpResponseCode::NotFound); Result.Reason = fmt::format("Failed loading oplog container from '{}'. Reason: 'The file does not exist'", SourcePath.string()); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; return Result; } IoBuffer ContainerPayload; @@ -285,18 +461,16 @@ private: if (Result.ContainerObject = ValidateAndReadCompactBinaryObject(std::move(ContainerPayload), ValidateResult); ValidateResult != CbValidateError::None || !Result.ContainerObject) { - Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("The file {} is not formatted as a compact binary object ('{}')", - SourcePath.string(), - ToString(ValidateResult)); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; + Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); + Result.Reason = fmt::format("The file {} is not formatted as a compact binary object ('{}')", + SourcePath.string(), + ToString(ValidateResult)); return Result; } - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; return Result; } - std::filesystem::path GetAttachmentPath(const IoHash& RawHash) const + std::filesystem::path GetAttachmentBasePath(const IoHash& RawHash) const { ExtendablePathBuilder<128> ShardedPath; ShardedPath.Append(m_OutputPath.c_str()); @@ -315,6 +489,19 @@ private: return ShardedPath.ToPath(); } + static constexpr std::string_view BlobExtension = ".blob"; + static constexpr std::string_view MetaExtension = ".meta"; + + std::filesystem::path GetAttachmentPath(const IoHash& RawHash) + { + return GetAttachmentBasePath(RawHash).replace_extension(BlobExtension); + } + + std::filesystem::path GetAttachmentMetaPath(const IoHash& RawHash) + { + return GetAttachmentBasePath(RawHash).replace_extension(MetaExtension); + } + void AddStats(uint64_t UploadedBytes, uint64_t DownloadedBytes, uint64_t ElapsedNS) { m_SentBytes.fetch_add(UploadedBytes); diff --git a/src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp b/src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp index 462de2988..5b456cb4c 100644 --- a/src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp +++ b/src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp @@ -212,13 +212,43 @@ public: return Result; } + virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes, + BuildStorageCache* OptionalCache, + const Oid& CacheBuildId) override + { + ZEN_UNUSED(BlockHashes, OptionalCache, CacheBuildId); + return GetBlockDescriptionsResult{Result{.ErrorCode = int(HttpResponseCode::NotFound)}}; + } + virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override { - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); - JupiterResult GetResult = Session.GetCompressedBlob(m_Namespace, RawHash, m_TempFilePath); + LoadAttachmentResult Result; + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); + JupiterResult GetResult = Session.GetCompressedBlob(m_Namespace, RawHash, m_TempFilePath); + AddStats(GetResult); + + Result = {ConvertResult(GetResult), std::move(GetResult.Response)}; + if (GetResult.ErrorCode) + { + Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}. Reason: '{}'", + m_JupiterClient->ServiceUrl(), + m_Namespace, + RawHash, + Result.Reason); + } + return Result; + } + + virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_ASSERT(!Ranges.empty()); + LoadAttachmentRangesResult Result; + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); + JupiterResult GetResult = Session.GetCompressedBlob(m_Namespace, RawHash, m_TempFilePath); AddStats(GetResult); - LoadAttachmentResult Result{ConvertResult(GetResult), std::move(GetResult.Response)}; + Result = LoadAttachmentRangesResult{ConvertResult(GetResult), std::move(GetResult.Response)}; if (GetResult.ErrorCode) { Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}. Reason: '{}'", @@ -227,6 +257,10 @@ public: RawHash, Result.Reason); } + else + { + Result.Ranges = std::vector<std::pair<uint64_t, uint64_t>>(Ranges.begin(), Ranges.end()); + } return Result; } @@ -247,8 +281,6 @@ public: return Result; } - virtual void Flush() override {} - private: LoadContainerResult LoadContainer(const IoHash& Key) { diff --git a/src/zenremotestore/projectstore/projectstoreoperations.cpp b/src/zenremotestore/projectstore/projectstoreoperations.cpp index becac3d4c..36dc4d868 100644 --- a/src/zenremotestore/projectstore/projectstoreoperations.cpp +++ b/src/zenremotestore/projectstore/projectstoreoperations.cpp @@ -426,19 +426,19 @@ ProjectStoreOperationDownloadAttachments::Execute() auto GetBuildBlob = [this](const IoHash& RawHash, const std::filesystem::path& OutputPath) { IoBuffer Payload; - if (m_Storage.BuildCacheStorage) + if (m_Storage.CacheStorage) { - Payload = m_Storage.BuildCacheStorage->GetBuildBlob(m_State.GetBuildId(), RawHash); + Payload = m_Storage.CacheStorage->GetBuildBlob(m_State.GetBuildId(), RawHash); } if (!Payload) { Payload = m_Storage.BuildStorage->GetBuildBlob(m_State.GetBuildId(), RawHash); - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) + if (m_Storage.CacheStorage && m_Options.PopulateCache) { - m_Storage.BuildCacheStorage->PutBuildBlob(m_State.GetBuildId(), - RawHash, - Payload.GetContentType(), - CompositeBuffer(SharedBuffer(Payload))); + m_Storage.CacheStorage->PutBuildBlob(m_State.GetBuildId(), + RawHash, + Payload.GetContentType(), + CompositeBuffer(SharedBuffer(Payload))); } } uint64_t PayloadSize = Payload.GetSize(); diff --git a/src/zenremotestore/projectstore/remoteprojectstore.cpp b/src/zenremotestore/projectstore/remoteprojectstore.cpp index 8be8eb0df..247bd6cb9 100644 --- a/src/zenremotestore/projectstore/remoteprojectstore.cpp +++ b/src/zenremotestore/projectstore/remoteprojectstore.cpp @@ -14,6 +14,8 @@ #include <zencore/trace.h> #include <zencore/workthreadpool.h> #include <zenhttp/httpcommon.h> +#include <zenremotestore/builds/buildstoragecache.h> +#include <zenremotestore/chunking/chunkedcontent.h> #include <zenremotestore/chunking/chunkedfile.h> #include <zenremotestore/operationlogoutput.h> #include <zenstore/cidstore.h> @@ -123,14 +125,17 @@ namespace remotestore_impl { return OptionalContext->IsCancelled(); } - std::string GetStats(const RemoteProjectStore::Stats& Stats, uint64_t ElapsedWallTimeMS) + std::string GetStats(const RemoteProjectStore::Stats& Stats, + const BuildStorageCache::Statistics* OptionalCacheStats, + uint64_t ElapsedWallTimeMS) { - return fmt::format( - "Sent: {} ({}bits/s) Recv: {} ({}bits/s)", - NiceBytes(Stats.m_SentBytes), - NiceNum(ElapsedWallTimeMS > 0u ? static_cast<uint64_t>((Stats.m_SentBytes * 8 * 1000) / ElapsedWallTimeMS) : 0u), - NiceBytes(Stats.m_ReceivedBytes), - NiceNum(ElapsedWallTimeMS > 0u ? static_cast<uint64_t>((Stats.m_ReceivedBytes * 8 * 1000) / ElapsedWallTimeMS) : 0u)); + uint64_t SentBytes = Stats.m_SentBytes + (OptionalCacheStats ? OptionalCacheStats->TotalBytesWritten.load() : 0); + uint64_t ReceivedBytes = Stats.m_ReceivedBytes + (OptionalCacheStats ? OptionalCacheStats->TotalBytesRead.load() : 0); + return fmt::format("Sent: {} ({}bits/s) Recv: {} ({}bits/s)", + NiceBytes(SentBytes), + NiceNum(ElapsedWallTimeMS > 0u ? static_cast<uint64_t>((SentBytes * 8 * 1000) / ElapsedWallTimeMS) : 0u), + NiceBytes(ReceivedBytes), + NiceNum(ElapsedWallTimeMS > 0u ? static_cast<uint64_t>((ReceivedBytes * 8 * 1000) / ElapsedWallTimeMS) : 0u)); } void LogRemoteStoreStatsDetails(const RemoteProjectStore::Stats& Stats) @@ -229,44 +234,66 @@ namespace remotestore_impl { struct DownloadInfo { - uint64_t OplogSizeBytes = 0; - std::atomic<uint64_t> AttachmentsDownloaded = 0; - std::atomic<uint64_t> AttachmentBlocksDownloaded = 0; - std::atomic<uint64_t> AttachmentBytesDownloaded = 0; - std::atomic<uint64_t> AttachmentBlockBytesDownloaded = 0; - std::atomic<uint64_t> AttachmentsStored = 0; - std::atomic<uint64_t> AttachmentBytesStored = 0; - std::atomic_size_t MissingAttachmentCount = 0; + uint64_t OplogSizeBytes = 0; + std::atomic<uint64_t> AttachmentsDownloaded = 0; + std::atomic<uint64_t> AttachmentBlocksDownloaded = 0; + std::atomic<uint64_t> AttachmentBlocksRangesDownloaded = 0; + std::atomic<uint64_t> AttachmentBytesDownloaded = 0; + std::atomic<uint64_t> AttachmentBlockBytesDownloaded = 0; + std::atomic<uint64_t> AttachmentBlockRangeBytesDownloaded = 0; + std::atomic<uint64_t> AttachmentsStored = 0; + std::atomic<uint64_t> AttachmentBytesStored = 0; + std::atomic_size_t MissingAttachmentCount = 0; }; - void DownloadAndSaveBlockChunks(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - bool IgnoreMissingAttachments, - JobContext* OptionalContext, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, - Latch& AttachmentsDownloadLatch, - Latch& AttachmentsWriteLatch, - AsyncRemoteResult& RemoteResult, - DownloadInfo& Info, - Stopwatch& LoadAttachmentsTimer, - std::atomic_uint64_t& DownloadStartMS, - const std::vector<IoHash>& Chunks) + class JobContextLogOutput : public OperationLogOutput + { + public: + JobContextLogOutput(JobContext* OptionalContext) : m_OptionalContext(OptionalContext) {} + virtual void EmitLogMessage(const logging::LogPoint& Point, fmt::format_args Args) override + { + if (m_OptionalContext) + { + fmt::basic_memory_buffer<char, 250> MessageBuffer; + fmt::vformat_to(fmt::appender(MessageBuffer), Point.FormatString, Args); + remotestore_impl::ReportMessage(m_OptionalContext, std::string_view(MessageBuffer.data(), MessageBuffer.size())); + } + } + + virtual void SetLogOperationName(std::string_view Name) override { ZEN_UNUSED(Name); } + virtual void SetLogOperationProgress(uint32_t StepIndex, uint32_t StepCount) override { ZEN_UNUSED(StepIndex, StepCount); } + virtual uint32_t GetProgressUpdateDelayMS() override { return 0; } + virtual ProgressBar* CreateProgressBar(std::string_view InSubTask) override + { + ZEN_UNUSED(InSubTask); + return nullptr; + } + + private: + JobContext* m_OptionalContext; + }; + + void DownloadAndSaveBlockChunks(LoadOplogContext& Context, + Latch& AttachmentsDownloadLatch, + Latch& AttachmentsWriteLatch, + AsyncRemoteResult& RemoteResult, + DownloadInfo& Info, + Stopwatch& LoadAttachmentsTimer, + std::atomic_uint64_t& DownloadStartMS, + ThinChunkBlockDescription&& ThinBlockDescription, + std::vector<uint32_t>&& NeededChunkIndexes) { AttachmentsDownloadLatch.AddCount(1); - NetworkWorkerPool.ScheduleWork( - [&RemoteStore, - &ChunkStore, - &WorkerPool, + Context.NetworkWorkerPool.ScheduleWork( + [&Context, &AttachmentsDownloadLatch, &AttachmentsWriteLatch, &RemoteResult, - Chunks = Chunks, + ThinBlockDescription = std::move(ThinBlockDescription), + NeededChunkIndexes = std::move(NeededChunkIndexes), &Info, &LoadAttachmentsTimer, - &DownloadStartMS, - IgnoreMissingAttachments, - OptionalContext]() { + &DownloadStartMS]() { ZEN_TRACE_CPU("DownloadBlockChunks"); auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); }); @@ -276,34 +303,47 @@ namespace remotestore_impl { } try { + std::vector<IoHash> Chunks; + Chunks.reserve(NeededChunkIndexes.size()); + for (uint32_t ChunkIndex : NeededChunkIndexes) + { + Chunks.push_back(ThinBlockDescription.ChunkRawHashes[ChunkIndex]); + } + uint64_t Unset = (std::uint64_t)-1; DownloadStartMS.compare_exchange_strong(Unset, LoadAttachmentsTimer.GetElapsedTimeMs()); - RemoteProjectStore::LoadAttachmentsResult Result = RemoteStore.LoadAttachments(Chunks); + RemoteProjectStore::LoadAttachmentsResult Result = Context.RemoteStore.LoadAttachments(Chunks); if (Result.ErrorCode) { - ReportMessage(OptionalContext, + ReportMessage(Context.OptionalJobContext, fmt::format("Failed to load attachments with {} chunks ({}): {}", Chunks.size(), RemoteResult.GetError(), RemoteResult.GetErrorReason())); Info.MissingAttachmentCount.fetch_add(1); - if (IgnoreMissingAttachments) + if (Context.IgnoreMissingAttachments) { RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text); } return; } - Info.AttachmentsDownloaded.fetch_add(Chunks.size()); - ZEN_INFO("Loaded {} bulk attachments in {}", - Chunks.size(), - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000))); + Info.AttachmentsDownloaded.fetch_add(Result.Chunks.size()); + for (const auto& It : Result.Chunks) + { + uint64_t ChunkSize = It.second.GetCompressedSize(); + Info.AttachmentBytesDownloaded.fetch_add(ChunkSize); + } + remotestore_impl::ReportMessage(Context.OptionalJobContext, + fmt::format("Loaded {} bulk attachments in {}", + Chunks.size(), + NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)))); if (RemoteResult.IsError()) { return; } AttachmentsWriteLatch.AddCount(1); - WorkerPool.ScheduleWork( - [&AttachmentsWriteLatch, &RemoteResult, &Info, &ChunkStore, Chunks = std::move(Result.Chunks)]() { + Context.WorkerPool.ScheduleWork( + [&AttachmentsWriteLatch, &RemoteResult, &Info, &Context, Chunks = std::move(Result.Chunks)]() { auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); }); if (RemoteResult.IsError()) { @@ -320,13 +360,13 @@ namespace remotestore_impl { for (const auto& It : Chunks) { - uint64_t ChunkSize = It.second.GetCompressedSize(); - Info.AttachmentBytesDownloaded.fetch_add(ChunkSize); WriteAttachmentBuffers.push_back(It.second.GetCompressed().Flatten().AsIoBuffer()); WriteRawHashes.push_back(It.first); } std::vector<CidStore::InsertResult> InsertResults = - ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes, CidStore::InsertMode::kCopyOnly); + Context.ChunkStore.AddChunks(WriteAttachmentBuffers, + WriteRawHashes, + CidStore::InsertMode::kCopyOnly); for (size_t Index = 0; Index < InsertResults.size(); Index++) { @@ -350,46 +390,38 @@ namespace remotestore_impl { catch (const std::exception& Ex) { RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to bulk load {} attachments", Chunks.size()), + fmt::format("Failed to bulk load {} attachments", NeededChunkIndexes.size()), Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); }; - void DownloadAndSaveBlock(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - bool IgnoreMissingAttachments, - JobContext* OptionalContext, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, - Latch& AttachmentsDownloadLatch, - Latch& AttachmentsWriteLatch, - AsyncRemoteResult& RemoteResult, - DownloadInfo& Info, - Stopwatch& LoadAttachmentsTimer, - std::atomic_uint64_t& DownloadStartMS, - const IoHash& BlockHash, - const std::vector<IoHash>& Chunks, - uint32_t RetriesLeft) + void DownloadAndSaveBlock(LoadOplogContext& Context, + Latch& AttachmentsDownloadLatch, + Latch& AttachmentsWriteLatch, + AsyncRemoteResult& RemoteResult, + DownloadInfo& Info, + Stopwatch& LoadAttachmentsTimer, + std::atomic_uint64_t& DownloadStartMS, + const IoHash& BlockHash, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& AllNeededPartialChunkHashesLookup, + std::span<std::atomic<bool>> ChunkDownloadedFlags, + uint32_t RetriesLeft) { AttachmentsDownloadLatch.AddCount(1); - NetworkWorkerPool.ScheduleWork( + Context.NetworkWorkerPool.ScheduleWork( [&AttachmentsDownloadLatch, &AttachmentsWriteLatch, - &ChunkStore, - &RemoteStore, - &NetworkWorkerPool, - &WorkerPool, - BlockHash, + &Context, &RemoteResult, &Info, &LoadAttachmentsTimer, &DownloadStartMS, - IgnoreMissingAttachments, - OptionalContext, RetriesLeft, - Chunks = std::vector<IoHash>(Chunks)]() { + BlockHash = IoHash(BlockHash), + &AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags]() { ZEN_TRACE_CPU("DownloadBlock"); auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); }); @@ -401,51 +433,65 @@ namespace remotestore_impl { { uint64_t Unset = (std::uint64_t)-1; DownloadStartMS.compare_exchange_strong(Unset, LoadAttachmentsTimer.GetElapsedTimeMs()); - RemoteProjectStore::LoadAttachmentResult BlockResult = RemoteStore.LoadAttachment(BlockHash); - if (BlockResult.ErrorCode) + + IoBuffer BlobBuffer; + if (Context.OptionalCache) { - ReportMessage(OptionalContext, - fmt::format("Failed to download block attachment {} ({}): {}", - BlockHash, - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); - Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) - { - RemoteResult.SetError(BlockResult.ErrorCode, BlockResult.Reason, BlockResult.Text); - } - return; + BlobBuffer = Context.OptionalCache->GetBuildBlob(Context.CacheBuildId, BlockHash); } - if (RemoteResult.IsError()) + + if (!BlobBuffer) { - return; + RemoteProjectStore::LoadAttachmentResult BlockResult = Context.RemoteStore.LoadAttachment(BlockHash); + if (BlockResult.ErrorCode) + { + ReportMessage(Context.OptionalJobContext, + fmt::format("Failed to download block attachment {} ({}): {}", + BlockHash, + BlockResult.Reason, + BlockResult.Text)); + Info.MissingAttachmentCount.fetch_add(1); + if (!Context.IgnoreMissingAttachments) + { + RemoteResult.SetError(BlockResult.ErrorCode, BlockResult.Reason, BlockResult.Text); + } + return; + } + if (RemoteResult.IsError()) + { + return; + } + BlobBuffer = std::move(BlockResult.Bytes); + ZEN_DEBUG("Loaded block attachment '{}' in {} ({})", + BlockHash, + NiceTimeSpanMs(static_cast<uint64_t>(BlockResult.ElapsedSeconds * 1000)), + NiceBytes(BlobBuffer.Size())); + if (Context.OptionalCache && Context.PopulateCache) + { + Context.OptionalCache->PutBuildBlob(Context.CacheBuildId, + BlockHash, + BlobBuffer.GetContentType(), + CompositeBuffer(SharedBuffer(BlobBuffer))); + } } - uint64_t BlockSize = BlockResult.Bytes.GetSize(); + uint64_t BlockSize = BlobBuffer.GetSize(); Info.AttachmentBlocksDownloaded.fetch_add(1); - ZEN_INFO("Loaded block attachment '{}' in {} ({})", - BlockHash, - NiceTimeSpanMs(static_cast<uint64_t>(BlockResult.ElapsedSeconds * 1000)), - NiceBytes(BlockSize)); Info.AttachmentBlockBytesDownloaded.fetch_add(BlockSize); AttachmentsWriteLatch.AddCount(1); - WorkerPool.ScheduleWork( + Context.WorkerPool.ScheduleWork( [&AttachmentsDownloadLatch, &AttachmentsWriteLatch, - &ChunkStore, - &RemoteStore, - &NetworkWorkerPool, - &WorkerPool, - BlockHash, + &Context, &RemoteResult, &Info, &LoadAttachmentsTimer, &DownloadStartMS, - IgnoreMissingAttachments, - OptionalContext, RetriesLeft, - Chunks = std::move(Chunks), - Bytes = std::move(BlockResult.Bytes)]() { + BlockHash = IoHash(BlockHash), + &AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags, + Bytes = std::move(BlobBuffer)]() { auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); }); if (RemoteResult.IsError()) { @@ -454,64 +500,107 @@ namespace remotestore_impl { try { ZEN_ASSERT(Bytes.Size() > 0); - std::unordered_set<IoHash, IoHash::Hasher> WantedChunks; - WantedChunks.reserve(Chunks.size()); - WantedChunks.insert(Chunks.begin(), Chunks.end()); std::vector<IoBuffer> WriteAttachmentBuffers; std::vector<IoHash> WriteRawHashes; IoHash RawHash; uint64_t RawSize; CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Bytes), RawHash, RawSize); + + std::string ErrorString; + if (!Compressed) { - if (RetriesLeft > 0) + ErrorString = + fmt::format("Block attachment {} is malformed, can't parse as compressed binary", BlockHash); + } + else if (RawHash != BlockHash) + { + ErrorString = fmt::format("Block attachment {} has mismatching raw hash ({})", BlockHash, RawHash); + } + else if (CompositeBuffer BlockPayload = Compressed.DecompressToComposite(); !BlockPayload) + { + ErrorString = fmt::format("Block attachment {} is malformed, can't decompress payload", BlockHash); + } + else + { + uint64_t PotentialSize = 0; + uint64_t UsedSize = 0; + uint64_t BlockSize = BlockPayload.GetSize(); + + uint64_t BlockHeaderSize = 0; + + bool StoreChunksOK = IterateChunkBlock( + BlockPayload.Flatten(), + [&AllNeededPartialChunkHashesLookup, + &ChunkDownloadedFlags, + &WriteAttachmentBuffers, + &WriteRawHashes, + &Info, + &PotentialSize](CompressedBuffer&& Chunk, const IoHash& AttachmentRawHash) { + auto ChunkIndexIt = AllNeededPartialChunkHashesLookup.find(AttachmentRawHash); + if (ChunkIndexIt != AllNeededPartialChunkHashesLookup.end()) + { + bool Expected = false; + if (ChunkDownloadedFlags[ChunkIndexIt->second].compare_exchange_strong(Expected, true)) + { + WriteAttachmentBuffers.emplace_back(Chunk.GetCompressed().Flatten().AsIoBuffer()); + IoHash RawHash; + uint64_t RawSize; + ZEN_ASSERT(CompressedBuffer::ValidateCompressedHeader( + WriteAttachmentBuffers.back(), + RawHash, + RawSize, + /*OutOptionalTotalCompressedSize*/ nullptr)); + ZEN_ASSERT(RawHash == AttachmentRawHash); + WriteRawHashes.emplace_back(AttachmentRawHash); + PotentialSize += WriteAttachmentBuffers.back().GetSize(); + } + } + }, + BlockHeaderSize); + + if (!StoreChunksOK) { - ReportMessage( - OptionalContext, - fmt::format( - "Block attachment {} is malformed, can't parse as compressed binary, retrying download", - BlockHash)); - return DownloadAndSaveBlock(ChunkStore, - RemoteStore, - IgnoreMissingAttachments, - OptionalContext, - NetworkWorkerPool, - WorkerPool, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, - Info, - LoadAttachmentsTimer, - DownloadStartMS, - BlockHash, - std::move(Chunks), - RetriesLeft - 1); + ErrorString = fmt::format("Invalid format for block {}", BlockHash); + } + else + { + if (!WriteAttachmentBuffers.empty()) + { + std::vector<CidStore::InsertResult> Results = + Context.ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes); + for (size_t Index = 0; Index < Results.size(); Index++) + { + const CidStore::InsertResult& Result = Results[Index]; + if (Result.New) + { + Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize()); + Info.AttachmentsStored.fetch_add(1); + UsedSize += WriteAttachmentBuffers[Index].GetSize(); + } + } + if (UsedSize < BlockSize) + { + ZEN_DEBUG("Used {} (skipping {}) out of {} for block {} ({} %) (use of matching {}%)", + NiceBytes(UsedSize), + NiceBytes(BlockSize - UsedSize), + NiceBytes(BlockSize), + BlockHash, + (100 * UsedSize) / BlockSize, + PotentialSize > 0 ? (UsedSize * 100) / PotentialSize : 0); + } + } } - ReportMessage( - OptionalContext, - fmt::format("Block attachment {} is malformed, can't parse as compressed binary", BlockHash)); - RemoteResult.SetError( - gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), - fmt::format("Block attachment {} is malformed, can't parse as compressed binary", BlockHash), - {}); - return; } - CompositeBuffer BlockPayload = Compressed.DecompressToComposite(); - if (!BlockPayload) + + if (!ErrorString.empty()) { if (RetriesLeft > 0) { - ReportMessage( - OptionalContext, - fmt::format("Block attachment {} is malformed, can't decompress payload, retrying download", - BlockHash)); - return DownloadAndSaveBlock(ChunkStore, - RemoteStore, - IgnoreMissingAttachments, - OptionalContext, - NetworkWorkerPool, - WorkerPool, + ReportMessage(Context.OptionalJobContext, fmt::format("{}, retrying download", ErrorString)); + + return DownloadAndSaveBlock(Context, AttachmentsDownloadLatch, AttachmentsWriteLatch, RemoteResult, @@ -519,91 +608,16 @@ namespace remotestore_impl { LoadAttachmentsTimer, DownloadStartMS, BlockHash, - std::move(Chunks), + AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags, RetriesLeft - 1); } - ReportMessage(OptionalContext, - fmt::format("Block attachment {} is malformed, can't decompress payload", BlockHash)); - RemoteResult.SetError( - gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), - fmt::format("Block attachment {} is malformed, can't decompress payload", BlockHash), - {}); - return; - } - if (RawHash != BlockHash) - { - ReportMessage(OptionalContext, - fmt::format("Block attachment {} has mismatching raw hash ({})", BlockHash, RawHash)); - RemoteResult.SetError( - gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), - fmt::format("Block attachment {} has mismatching raw hash ({})", BlockHash, RawHash), - {}); - return; - } - - uint64_t PotentialSize = 0; - uint64_t UsedSize = 0; - uint64_t BlockSize = BlockPayload.GetSize(); - - uint64_t BlockHeaderSize = 0; - bool StoreChunksOK = IterateChunkBlock( - BlockPayload.Flatten(), - [&WantedChunks, &WriteAttachmentBuffers, &WriteRawHashes, &Info, &PotentialSize]( - CompressedBuffer&& Chunk, - const IoHash& AttachmentRawHash) { - if (WantedChunks.contains(AttachmentRawHash)) - { - WriteAttachmentBuffers.emplace_back(Chunk.GetCompressed().Flatten().AsIoBuffer()); - IoHash RawHash; - uint64_t RawSize; - ZEN_ASSERT( - CompressedBuffer::ValidateCompressedHeader(WriteAttachmentBuffers.back(), - RawHash, - RawSize, - /*OutOptionalTotalCompressedSize*/ nullptr)); - ZEN_ASSERT(RawHash == AttachmentRawHash); - WriteRawHashes.emplace_back(AttachmentRawHash); - WantedChunks.erase(AttachmentRawHash); - PotentialSize += WriteAttachmentBuffers.back().GetSize(); - } - }, - BlockHeaderSize); - - if (!StoreChunksOK) - { - ReportMessage(OptionalContext, - fmt::format("Block attachment {} has invalid format ({}): {}", - BlockHash, - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); - RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), - fmt::format("Invalid format for block {}", BlockHash), - {}); - return; - } - - ZEN_ASSERT(WantedChunks.empty()); - - if (!WriteAttachmentBuffers.empty()) - { - auto Results = ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes); - for (size_t Index = 0; Index < Results.size(); Index++) + else { - const auto& Result = Results[Index]; - if (Result.New) - { - Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize()); - Info.AttachmentsStored.fetch_add(1); - UsedSize += WriteAttachmentBuffers[Index].GetSize(); - } + ReportMessage(Context.OptionalJobContext, ErrorString); + RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), ErrorString, {}); + return; } - ZEN_DEBUG("Used {} (matching {}) out of {} for block {} ({} %) (use of matching {}%)", - NiceBytes(UsedSize), - NiceBytes(PotentialSize), - NiceBytes(BlockSize), - BlockHash, - (100 * UsedSize) / BlockSize, - PotentialSize > 0 ? (UsedSize * 100) / PotentialSize : 0); } } catch (const std::exception& Ex) @@ -618,19 +632,458 @@ namespace remotestore_impl { catch (const std::exception& Ex) { RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to block attachment {}", BlockHash), + fmt::format("Failed to download block attachment {}", BlockHash), + Ex.what()); + } + }, + WorkerThreadPool::EMode::EnableBacklog); + }; + + void DownloadPartialBlock(LoadOplogContext& Context, + AsyncRemoteResult& RemoteResult, + DownloadInfo& Info, + double& DownloadTimeSeconds, + const ChunkBlockDescription& BlockDescription, + bool BlockExistsInCache, + std::span<const ChunkBlockAnalyser::BlockRangeDescriptor> BlockRangeDescriptors, + size_t BlockRangeIndexStart, + size_t BlockRangeCount, + std::function<void(IoBuffer&& Buffer, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths)>&& OnDownloaded) + { + ZEN_ASSERT(Context.StoreMaxRangeCountPerRequest != 0); + ZEN_ASSERT(BlockExistsInCache == false || Context.CacheMaxRangeCountPerRequest != 0); + + std::vector<std::pair<uint64_t, uint64_t>> Ranges; + Ranges.reserve(BlockRangeDescriptors.size()); + for (size_t BlockRangeIndex = BlockRangeIndexStart; BlockRangeIndex < BlockRangeIndexStart + BlockRangeCount; BlockRangeIndex++) + { + const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange = BlockRangeDescriptors[BlockRangeIndex]; + Ranges.push_back(std::make_pair(BlockRange.RangeStart, BlockRange.RangeLength)); + } + + size_t SubBlockRangeCount = BlockRangeCount; + size_t SubRangeCountComplete = 0; + std::span<const std::pair<uint64_t, uint64_t>> RangesSpan(Ranges); + + while (SubRangeCountComplete < SubBlockRangeCount) + { + if (RemoteResult.IsError()) + { + break; + } + + size_t SubRangeStartIndex = BlockRangeIndexStart + SubRangeCountComplete; + if (BlockExistsInCache) + { + ZEN_ASSERT(Context.OptionalCache); + size_t SubRangeCount = Min(BlockRangeCount - SubRangeCountComplete, Context.CacheMaxRangeCountPerRequest); + + if (SubRangeCount == 1) + { + // Legacy single-range path, prefer that for max compatibility + + const std::pair<uint64_t, uint64_t> SubRange = RangesSpan[SubRangeCountComplete]; + Stopwatch CacheTimer; + IoBuffer PayloadBuffer = Context.OptionalCache->GetBuildBlob(Context.CacheBuildId, + BlockDescription.BlockHash, + SubRange.first, + SubRange.second); + DownloadTimeSeconds += CacheTimer.GetElapsedTimeMs() / 1000.0; + if (RemoteResult.IsError()) + { + break; + } + if (PayloadBuffer) + { + OnDownloaded(std::move(PayloadBuffer), + SubRangeStartIndex, + std::vector<std::pair<uint64_t, uint64_t>>{std::make_pair(0u, SubRange.second)}); + SubRangeCountComplete += SubRangeCount; + continue; + } + } + else + { + auto SubRanges = RangesSpan.subspan(SubRangeCountComplete, SubRangeCount); + + Stopwatch CacheTimer; + BuildStorageCache::BuildBlobRanges RangeBuffers = + Context.OptionalCache->GetBuildBlobRanges(Context.CacheBuildId, BlockDescription.BlockHash, SubRanges); + DownloadTimeSeconds += CacheTimer.GetElapsedTimeMs() / 1000.0; + if (RemoteResult.IsError()) + { + break; + } + if (RangeBuffers.PayloadBuffer) + { + if (RangeBuffers.Ranges.empty()) + { + SubRangeCount = Ranges.size() - SubRangeCountComplete; + OnDownloaded(std::move(RangeBuffers.PayloadBuffer), + SubRangeStartIndex, + RangesSpan.subspan(SubRangeCountComplete, SubRangeCount)); + SubRangeCountComplete += SubRangeCount; + continue; + } + else if (RangeBuffers.Ranges.size() == SubRangeCount) + { + OnDownloaded(std::move(RangeBuffers.PayloadBuffer), SubRangeStartIndex, RangeBuffers.Ranges); + SubRangeCountComplete += SubRangeCount; + continue; + } + } + } + } + + size_t SubRangeCount = Min(BlockRangeCount - SubRangeCountComplete, Context.StoreMaxRangeCountPerRequest); + + auto SubRanges = RangesSpan.subspan(SubRangeCountComplete, SubRangeCount); + + RemoteProjectStore::LoadAttachmentRangesResult BlockResult = + Context.RemoteStore.LoadAttachmentRanges(BlockDescription.BlockHash, SubRanges); + DownloadTimeSeconds += BlockResult.ElapsedSeconds; + if (RemoteResult.IsError()) + { + break; + } + if (BlockResult.ErrorCode || !BlockResult.Bytes) + { + ReportMessage(Context.OptionalJobContext, + fmt::format("Failed to download {} ranges from block attachment '{}' ({}): {}", + SubRanges.size(), + BlockDescription.BlockHash, + BlockResult.ErrorCode, + BlockResult.Reason)); + Info.MissingAttachmentCount.fetch_add(1); + if (!Context.IgnoreMissingAttachments) + { + RemoteResult.SetError(BlockResult.ErrorCode, BlockResult.Reason, BlockResult.Text); + break; + } + } + else + { + if (BlockResult.Ranges.empty()) + { + // Jupiter will ignore the ranges and send the whole payload if it fetches the payload from S3 + // Use the whole payload for the remaining ranges + + if (Context.OptionalCache && Context.PopulateCache) + { + Context.OptionalCache->PutBuildBlob(Context.CacheBuildId, + BlockDescription.BlockHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(std::vector<IoBuffer>{BlockResult.Bytes})); + if (RemoteResult.IsError()) + { + break; + } + } + SubRangeCount = Ranges.size() - SubRangeCountComplete; + OnDownloaded(std::move(BlockResult.Bytes), + SubRangeStartIndex, + RangesSpan.subspan(SubRangeCountComplete, SubRangeCount)); + } + else + { + if (BlockResult.Ranges.size() != SubRanges.size()) + { + RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), + fmt::format("Range response for block {} contains {} ranges, expected {} ranges", + BlockDescription.BlockHash, + BlockResult.Ranges.size(), + SubRanges.size()), + ""); + break; + } + OnDownloaded(std::move(BlockResult.Bytes), SubRangeStartIndex, BlockResult.Ranges); + } + } + + SubRangeCountComplete += SubRangeCount; + } + } + + void DownloadAndSavePartialBlock(LoadOplogContext& Context, + Latch& AttachmentsDownloadLatch, + Latch& AttachmentsWriteLatch, + AsyncRemoteResult& RemoteResult, + DownloadInfo& Info, + Stopwatch& LoadAttachmentsTimer, + std::atomic_uint64_t& DownloadStartMS, + const ChunkBlockDescription& BlockDescription, + bool BlockExistsInCache, + std::span<const ChunkBlockAnalyser::BlockRangeDescriptor> BlockRangeDescriptors, + size_t BlockRangeIndexStart, + size_t BlockRangeCount, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& AllNeededPartialChunkHashesLookup, + std::span<std::atomic<bool>> ChunkDownloadedFlags, + uint32_t RetriesLeft) + { + AttachmentsDownloadLatch.AddCount(1); + Context.NetworkWorkerPool.ScheduleWork( + [&AttachmentsDownloadLatch, + &AttachmentsWriteLatch, + &Context, + &RemoteResult, + &Info, + &LoadAttachmentsTimer, + &DownloadStartMS, + BlockDescription, + BlockExistsInCache, + BlockRangeDescriptors, + BlockRangeIndexStart, + BlockRangeCount, + &AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags, + RetriesLeft]() { + ZEN_TRACE_CPU("DownloadBlockRanges"); + + auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); }); + try + { + uint64_t Unset = (std::uint64_t)-1; + DownloadStartMS.compare_exchange_strong(Unset, LoadAttachmentsTimer.GetElapsedTimeMs()); + + double DownloadElapsedSeconds = 0; + uint64_t DownloadedBytes = 0; + + DownloadPartialBlock( + Context, + RemoteResult, + Info, + DownloadElapsedSeconds, + BlockDescription, + BlockExistsInCache, + BlockRangeDescriptors, + BlockRangeIndexStart, + BlockRangeCount, + [&](IoBuffer&& Buffer, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths) { + uint64_t BlockPartSize = Buffer.GetSize(); + DownloadedBytes += BlockPartSize; + + Info.AttachmentBlockRangeBytesDownloaded.fetch_add(BlockPartSize); + Info.AttachmentBlocksRangesDownloaded++; + + AttachmentsWriteLatch.AddCount(1); + Context.WorkerPool.ScheduleWork( + [&AttachmentsWriteLatch, + &Context, + &AttachmentsDownloadLatch, + &RemoteResult, + &Info, + &LoadAttachmentsTimer, + &DownloadStartMS, + BlockDescription, + BlockExistsInCache, + BlockRangeDescriptors, + BlockRangeStartIndex, + &AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags, + RetriesLeft, + BlockPayload = std::move(Buffer), + OffsetAndLengths = + std::vector<std::pair<uint64_t, uint64_t>>(OffsetAndLengths.begin(), OffsetAndLengths.end())]() { + auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); }); + try + { + ZEN_ASSERT(BlockPayload.Size() > 0); + + size_t RangeCount = OffsetAndLengths.size(); + for (size_t RangeOffset = 0; RangeOffset < RangeCount; RangeOffset++) + { + if (RemoteResult.IsError()) + { + return; + } + + const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange = + BlockRangeDescriptors[BlockRangeStartIndex + RangeOffset]; + const std::pair<uint64_t, uint64_t>& OffsetAndLength = OffsetAndLengths[RangeOffset]; + IoBuffer BlockRangeBuffer(BlockPayload, OffsetAndLength.first, OffsetAndLength.second); + + std::vector<IoBuffer> WriteAttachmentBuffers; + std::vector<IoHash> WriteRawHashes; + + uint64_t PotentialSize = 0; + uint64_t UsedSize = 0; + uint64_t BlockPartSize = BlockRangeBuffer.GetSize(); + + uint32_t OffsetInBlock = 0; + for (uint32_t ChunkBlockIndex = BlockRange.ChunkBlockIndexStart; + ChunkBlockIndex < BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount; + ChunkBlockIndex++) + { + if (RemoteResult.IsError()) + { + break; + } + + const uint32_t ChunkCompressedSize = + BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + + if (auto ChunkIndexIt = AllNeededPartialChunkHashesLookup.find(ChunkHash); + ChunkIndexIt != AllNeededPartialChunkHashesLookup.end()) + { + if (!ChunkDownloadedFlags[ChunkIndexIt->second]) + { + IoHash VerifyChunkHash; + uint64_t VerifyChunkSize; + CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed( + SharedBuffer(IoBuffer(BlockRangeBuffer, OffsetInBlock, ChunkCompressedSize)), + VerifyChunkHash, + VerifyChunkSize); + + std::string ErrorString; + + if (!CompressedChunk) + { + ErrorString = fmt::format( + "Chunk at {},{} in block attachment '{}' is not a valid compressed buffer", + OffsetInBlock, + ChunkCompressedSize, + BlockDescription.BlockHash); + } + else if (VerifyChunkHash != ChunkHash) + { + ErrorString = fmt::format( + "Chunk at {},{} in block attachment '{}' has mismatching hash, expected " + "{}, got {}", + OffsetInBlock, + ChunkCompressedSize, + BlockDescription.BlockHash, + ChunkHash, + VerifyChunkHash); + } + else if (VerifyChunkSize != BlockDescription.ChunkRawLengths[ChunkBlockIndex]) + { + ErrorString = fmt::format( + "Chunk at {},{} in block attachment '{}' has mismatching raw size, " + "expected {}, " + "got {}", + OffsetInBlock, + ChunkCompressedSize, + BlockDescription.BlockHash, + BlockDescription.ChunkRawLengths[ChunkBlockIndex], + VerifyChunkSize); + } + + if (!ErrorString.empty()) + { + if (RetriesLeft > 0) + { + ReportMessage(Context.OptionalJobContext, + fmt::format("{}, retrying download", ErrorString)); + return DownloadAndSavePartialBlock(Context, + AttachmentsDownloadLatch, + AttachmentsWriteLatch, + RemoteResult, + Info, + LoadAttachmentsTimer, + DownloadStartMS, + BlockDescription, + BlockExistsInCache, + BlockRangeDescriptors, + BlockRangeStartIndex, + RangeCount, + AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags, + RetriesLeft - 1); + } + + ReportMessage(Context.OptionalJobContext, ErrorString); + Info.MissingAttachmentCount.fetch_add(1); + if (!Context.IgnoreMissingAttachments) + { + RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::NotFound), + "Malformed chunk block", + ErrorString); + } + } + else + { + bool Expected = false; + if (ChunkDownloadedFlags[ChunkIndexIt->second].compare_exchange_strong(Expected, + true)) + { + WriteAttachmentBuffers.emplace_back( + CompressedChunk.GetCompressed().Flatten().AsIoBuffer()); + WriteRawHashes.emplace_back(ChunkHash); + PotentialSize += WriteAttachmentBuffers.back().GetSize(); + } + } + } + } + OffsetInBlock += ChunkCompressedSize; + } + + if (!WriteAttachmentBuffers.empty()) + { + std::vector<CidStore::InsertResult> Results = + Context.ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes); + for (size_t Index = 0; Index < Results.size(); Index++) + { + const CidStore::InsertResult& Result = Results[Index]; + if (Result.New) + { + Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize()); + Info.AttachmentsStored.fetch_add(1); + UsedSize += WriteAttachmentBuffers[Index].GetSize(); + } + } + if (UsedSize < BlockPartSize) + { + ZEN_DEBUG( + "Used {} (skipping {}) out of {} for block {} range {}, {} ({} %) (use of matching " + "{}%)", + NiceBytes(UsedSize), + NiceBytes(BlockPartSize - UsedSize), + NiceBytes(BlockPartSize), + BlockDescription.BlockHash, + BlockRange.RangeStart, + BlockRange.RangeLength, + (100 * UsedSize) / BlockPartSize, + PotentialSize > 0 ? (UsedSize * 100) / PotentialSize : 0); + } + } + } + } + catch (const std::exception& Ex) + { + RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), + fmt::format("Failed saving {} ranges from block attachment {}", + OffsetAndLengths.size(), + BlockDescription.BlockHash), + Ex.what()); + } + }, + WorkerThreadPool::EMode::EnableBacklog); + }); + if (!RemoteResult.IsError()) + { + ZEN_DEBUG("Loaded {} ranges from block attachment '{}' in {} ({})", + BlockRangeCount, + BlockDescription.BlockHash, + NiceTimeSpanMs(static_cast<uint64_t>(DownloadElapsedSeconds * 1000)), + NiceBytes(DownloadedBytes)); + } + } + catch (const std::exception& Ex) + { + RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), + fmt::format("Failed to download block attachment {} ranges", BlockDescription.BlockHash), Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); }; - void DownloadAndSaveAttachment(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - bool IgnoreMissingAttachments, - JobContext* OptionalContext, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, + void DownloadAndSaveAttachment(LoadOplogContext& Context, Latch& AttachmentsDownloadLatch, Latch& AttachmentsWriteLatch, AsyncRemoteResult& RemoteResult, @@ -640,19 +1093,15 @@ namespace remotestore_impl { const IoHash& RawHash) { AttachmentsDownloadLatch.AddCount(1); - NetworkWorkerPool.ScheduleWork( - [&RemoteStore, - &ChunkStore, - &WorkerPool, + Context.NetworkWorkerPool.ScheduleWork( + [&Context, &RemoteResult, &AttachmentsDownloadLatch, &AttachmentsWriteLatch, RawHash, &LoadAttachmentsTimer, &DownloadStartMS, - &Info, - IgnoreMissingAttachments, - OptionalContext]() { + &Info]() { ZEN_TRACE_CPU("DownloadAttachment"); auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); }); @@ -664,43 +1113,52 @@ namespace remotestore_impl { { uint64_t Unset = (std::uint64_t)-1; DownloadStartMS.compare_exchange_strong(Unset, LoadAttachmentsTimer.GetElapsedTimeMs()); - RemoteProjectStore::LoadAttachmentResult AttachmentResult = RemoteStore.LoadAttachment(RawHash); - if (AttachmentResult.ErrorCode) + IoBuffer BlobBuffer; + if (Context.OptionalCache) { - ReportMessage(OptionalContext, - fmt::format("Failed to download large attachment {}: '{}', error code : {}", - RawHash, - AttachmentResult.Reason, - AttachmentResult.ErrorCode)); - Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) + BlobBuffer = Context.OptionalCache->GetBuildBlob(Context.CacheBuildId, RawHash); + } + if (!BlobBuffer) + { + RemoteProjectStore::LoadAttachmentResult AttachmentResult = Context.RemoteStore.LoadAttachment(RawHash); + if (AttachmentResult.ErrorCode) { - RemoteResult.SetError(AttachmentResult.ErrorCode, AttachmentResult.Reason, AttachmentResult.Text); + ReportMessage(Context.OptionalJobContext, + fmt::format("Failed to download large attachment {}: '{}', error code : {}", + RawHash, + AttachmentResult.Reason, + AttachmentResult.ErrorCode)); + Info.MissingAttachmentCount.fetch_add(1); + if (!Context.IgnoreMissingAttachments) + { + RemoteResult.SetError(AttachmentResult.ErrorCode, AttachmentResult.Reason, AttachmentResult.Text); + } + return; + } + BlobBuffer = std::move(AttachmentResult.Bytes); + ZEN_DEBUG("Loaded large attachment '{}' in {} ({})", + RawHash, + NiceTimeSpanMs(static_cast<uint64_t>(AttachmentResult.ElapsedSeconds * 1000)), + NiceBytes(BlobBuffer.GetSize())); + if (Context.OptionalCache && Context.PopulateCache) + { + Context.OptionalCache->PutBuildBlob(Context.CacheBuildId, + RawHash, + BlobBuffer.GetContentType(), + CompositeBuffer(SharedBuffer(BlobBuffer))); } - return; } - uint64_t AttachmentSize = AttachmentResult.Bytes.GetSize(); - ZEN_INFO("Loaded large attachment '{}' in {} ({})", - RawHash, - NiceTimeSpanMs(static_cast<uint64_t>(AttachmentResult.ElapsedSeconds * 1000)), - NiceBytes(AttachmentSize)); - Info.AttachmentsDownloaded.fetch_add(1); if (RemoteResult.IsError()) { return; } + uint64_t AttachmentSize = BlobBuffer.GetSize(); + Info.AttachmentsDownloaded.fetch_add(1); Info.AttachmentBytesDownloaded.fetch_add(AttachmentSize); AttachmentsWriteLatch.AddCount(1); - WorkerPool.ScheduleWork( - [&AttachmentsWriteLatch, - &RemoteResult, - &Info, - &ChunkStore, - RawHash, - AttachmentSize, - Bytes = std::move(AttachmentResult.Bytes), - OptionalContext]() { + Context.WorkerPool.ScheduleWork( + [&Context, &AttachmentsWriteLatch, &RemoteResult, &Info, RawHash, AttachmentSize, Bytes = std::move(BlobBuffer)]() { ZEN_TRACE_CPU("WriteAttachment"); auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); }); @@ -710,7 +1168,7 @@ namespace remotestore_impl { } try { - CidStore::InsertResult InsertResult = ChunkStore.AddChunk(Bytes, RawHash); + CidStore::InsertResult InsertResult = Context.ChunkStore.AddChunk(Bytes, RawHash); if (InsertResult.New) { Info.AttachmentBytesStored.fetch_add(AttachmentSize); @@ -1126,7 +1584,9 @@ namespace remotestore_impl { uint64_t PartialTransferWallTimeMS = Timer.GetElapsedTimeMs(); ReportProgress(OptionalContext, "Saving attachments"sv, - fmt::format("{} remaining... {}", Remaining, GetStats(RemoteStore.GetStats(), PartialTransferWallTimeMS)), + fmt::format("{} remaining... {}", + Remaining, + GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, PartialTransferWallTimeMS)), AttachmentsToSave, Remaining); } @@ -1135,7 +1595,7 @@ namespace remotestore_impl { { ReportProgress(OptionalContext, "Saving attachments"sv, - fmt::format("{}", GetStats(RemoteStore.GetStats(), ElapsedTimeMS)), + fmt::format("{}", GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, ElapsedTimeMS)), AttachmentsToSave, 0); } @@ -1146,7 +1606,7 @@ namespace remotestore_impl { LargeAttachmentCountToUpload, BulkAttachmentCountToUpload, NiceTimeSpanMs(ElapsedTimeMS), - GetStats(RemoteStore.GetStats(), ElapsedTimeMS))); + GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, ElapsedTimeMS))); } } // namespace remotestore_impl @@ -1224,35 +1684,7 @@ BuildContainer(CidStore& ChunkStore, { using namespace std::literals; - class JobContextLogOutput : public OperationLogOutput - { - public: - JobContextLogOutput(JobContext* OptionalContext) : m_OptionalContext(OptionalContext) {} - virtual void EmitLogMessage(int LogLevel, std::string_view Format, fmt::format_args Args) override - { - ZEN_UNUSED(LogLevel); - if (m_OptionalContext) - { - fmt::basic_memory_buffer<char, 250> MessageBuffer; - fmt::vformat_to(fmt::appender(MessageBuffer), Format, Args); - remotestore_impl::ReportMessage(m_OptionalContext, std::string_view(MessageBuffer.data(), MessageBuffer.size())); - } - } - - virtual void SetLogOperationName(std::string_view Name) override { ZEN_UNUSED(Name); } - virtual void SetLogOperationProgress(uint32_t StepIndex, uint32_t StepCount) override { ZEN_UNUSED(StepIndex, StepCount); } - virtual uint32_t GetProgressUpdateDelayMS() override { return 0; } - virtual ProgressBar* CreateProgressBar(std::string_view InSubTask) override - { - ZEN_UNUSED(InSubTask); - return nullptr; - } - - private: - JobContext* m_OptionalContext; - }; - - std::unique_ptr<OperationLogOutput> LogOutput(std::make_unique<JobContextLogOutput>(OptionalContext)); + std::unique_ptr<OperationLogOutput> LogOutput(std::make_unique<remotestore_impl::JobContextLogOutput>(OptionalContext)); size_t OpCount = 0; @@ -1783,31 +2215,36 @@ BuildContainer(CidStore& ChunkStore, } ResolveAttachmentsLatch.CountDown(); - while (!ResolveAttachmentsLatch.Wait(1000)) { - ptrdiff_t Remaining = ResolveAttachmentsLatch.Remaining(); - if (remotestore_impl::IsCancelled(OptionalContext)) + ptrdiff_t AttachmentCountToUseForProgress = ResolveAttachmentsLatch.Remaining(); + while (!ResolveAttachmentsLatch.Wait(1000)) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - while (!ResolveAttachmentsLatch.Wait(1000)) + ptrdiff_t Remaining = ResolveAttachmentsLatch.Remaining(); + if (remotestore_impl::IsCancelled(OptionalContext)) { - Remaining = ResolveAttachmentsLatch.Remaining(); - remotestore_impl::ReportProgress(OptionalContext, - "Resolving attachments"sv, - fmt::format("Aborting, {} attachments remaining...", Remaining), - UploadAttachments.size(), - Remaining); + RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); + remotestore_impl::ReportMessage( + OptionalContext, + fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); + while (!ResolveAttachmentsLatch.Wait(1000)) + { + Remaining = ResolveAttachmentsLatch.Remaining(); + remotestore_impl::ReportProgress(OptionalContext, + "Resolving attachments"sv, + fmt::format("Aborting, {} attachments remaining...", Remaining), + UploadAttachments.size(), + Remaining); + } + remotestore_impl::ReportProgress(OptionalContext, "Resolving attachments"sv, "Aborted"sv, UploadAttachments.size(), 0); + return {}; } - remotestore_impl::ReportProgress(OptionalContext, "Resolving attachments"sv, "Aborted"sv, UploadAttachments.size(), 0); - return {}; + AttachmentCountToUseForProgress = Max(Remaining, AttachmentCountToUseForProgress); + remotestore_impl::ReportProgress(OptionalContext, + "Resolving attachments"sv, + fmt::format("{} remaining...", Remaining), + AttachmentCountToUseForProgress, + Remaining); } - remotestore_impl::ReportProgress(OptionalContext, - "Resolving attachments"sv, - fmt::format("{} remaining...", Remaining), - UploadAttachments.size(), - Remaining); } if (UploadAttachments.size() > 0) { @@ -2010,14 +2447,13 @@ BuildContainer(CidStore& ChunkStore, AsyncOnBlock, RemoteResult); ComposedBlocks++; + // Worker will set Blocks[BlockIndex] = Block (including ChunkRawHashes) under shared lock } else { ZEN_INFO("Bulk group {} attachments", ChunkCount); OnBlockChunks(std::move(ChunksInBlock)); - } - { - // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index + // We can share the lock as we are not resizing the vector and only touch our own index RwLock::SharedLockScope _(BlocksLock); Blocks[BlockIndex].ChunkRawHashes = std::move(ChunkRawHashes); } @@ -2195,12 +2631,14 @@ BuildContainer(CidStore& ChunkStore, 0); } - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Built oplog and collected {} attachments from {} ops into {} blocks and in {}", - ChunkAssembleCount, - TotalOpCount, - GeneratedBlockCount, - NiceTimeSpanMs(static_cast<uint64_t>(Timer.GetElapsedTimeMs())))); + remotestore_impl::ReportMessage( + OptionalContext, + fmt::format("Built oplog and collected {} attachments from {} ops into {} blocks and {} loose attachments in {}", + ChunkAssembleCount, + TotalOpCount, + GeneratedBlockCount, + LargeChunkHashes.size(), + NiceTimeSpanMs(static_cast<uint64_t>(Timer.GetElapsedTimeMs())))); if (remotestore_impl::IsCancelled(OptionalContext)) { @@ -2752,30 +3190,32 @@ SaveOplog(CidStore& ChunkStore, remotestore_impl::LogRemoteStoreStatsDetails(RemoteStore.GetStats()); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Saved oplog '{}' {} in {} ({}), Blocks: {} ({}), Attachments: {} ({}) {}", - RemoteStoreInfo.ContainerName, - RemoteResult.GetError() == 0 ? "SUCCESS" : "FAILURE", - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000.0)), - NiceBytes(Info.OplogSizeBytes), - Info.AttachmentBlocksUploaded.load(), - NiceBytes(Info.AttachmentBlockBytesUploaded.load()), - Info.AttachmentsUploaded.load(), - NiceBytes(Info.AttachmentBytesUploaded.load()), - remotestore_impl::GetStats(RemoteStore.GetStats(), TransferWallTimeMS))); + remotestore_impl::ReportMessage( + OptionalContext, + fmt::format("Saved oplog '{}' {} in {} ({}), Blocks: {} ({}), Attachments: {} ({}) {}", + RemoteStoreInfo.ContainerName, + RemoteResult.GetError() == 0 ? "SUCCESS" : "FAILURE", + NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000.0)), + NiceBytes(Info.OplogSizeBytes), + Info.AttachmentBlocksUploaded.load(), + NiceBytes(Info.AttachmentBlockBytesUploaded.load()), + Info.AttachmentsUploaded.load(), + NiceBytes(Info.AttachmentBytesUploaded.load()), + remotestore_impl::GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, TransferWallTimeMS))); return Result; }; RemoteProjectStore::Result -ParseOplogContainer(const CbObject& ContainerObject, - const std::function<void(std::span<IoHash> RawHashes)>& OnReferencedAttachments, - const std::function<bool(const IoHash& RawHash)>& HasAttachment, - const std::function<void(const IoHash& BlockHash, std::vector<IoHash>&& Chunks)>& OnNeedBlock, - const std::function<void(const IoHash& RawHash)>& OnNeedAttachment, - const std::function<void(const ChunkedInfo&)>& OnChunkedAttachment, - CbObject& OutOplogSection, - JobContext* OptionalContext) +ParseOplogContainer( + const CbObject& ContainerObject, + const std::function<void(std::span<IoHash> RawHashes)>& OnReferencedAttachments, + const std::function<bool(const IoHash& RawHash)>& HasAttachment, + const std::function<void(ThinChunkBlockDescription&& ThinBlockDescription, std::vector<uint32_t>&& NeededChunkIndexes)>& OnNeedBlock, + const std::function<void(const IoHash& RawHash)>& OnNeedAttachment, + const std::function<void(const ChunkedInfo&)>& OnChunkedAttachment, + CbObject& OutOplogSection, + JobContext* OptionalContext) { using namespace std::literals; @@ -2801,22 +3241,43 @@ ParseOplogContainer(const CbObject& ContainerObject, "Section has unexpected data type", "Failed to save oplog container"}; } - std::unordered_set<IoHash, IoHash::Hasher> OpsAttachments; + std::unordered_set<IoHash, IoHash::Hasher> NeededAttachments; { CbArrayView OpsArray = OutOplogSection["ops"sv].AsArrayView(); + + size_t OpCount = OpsArray.Num(); + size_t OpsCompleteCount = 0; + + remotestore_impl::ReportMessage(OptionalContext, fmt::format("Scanning {} ops for attachments", OpCount)); + for (CbFieldView OpEntry : OpsArray) { - OpEntry.IterateAttachments([&](CbFieldView FieldView) { OpsAttachments.insert(FieldView.AsAttachment()); }); + OpEntry.IterateAttachments([&](CbFieldView FieldView) { NeededAttachments.insert(FieldView.AsAttachment()); }); if (remotestore_impl::IsCancelled(OptionalContext)) { return RemoteProjectStore::Result{.ErrorCode = gsl::narrow<int>(HttpResponseCode::OK), .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, .Reason = "Operation cancelled"}; } + OpsCompleteCount++; + if ((OpsCompleteCount & 4095) == 0) + { + remotestore_impl::ReportProgress( + OptionalContext, + "Scanning oplog"sv, + fmt::format("{} attachments found, {} ops remaining...", NeededAttachments.size(), OpCount - OpsCompleteCount), + OpCount, + OpCount - OpsCompleteCount); + } } + remotestore_impl::ReportProgress(OptionalContext, + "Scanning oplog"sv, + fmt::format("{} attachments found", NeededAttachments.size()), + OpCount, + OpCount - OpsCompleteCount); } { - std::vector<IoHash> ReferencedAttachments(OpsAttachments.begin(), OpsAttachments.end()); + std::vector<IoHash> ReferencedAttachments(NeededAttachments.begin(), NeededAttachments.end()); OnReferencedAttachments(ReferencedAttachments); } @@ -2827,24 +3288,41 @@ ParseOplogContainer(const CbObject& ContainerObject, .Reason = "Operation cancelled"}; } - remotestore_impl::ReportMessage(OptionalContext, fmt::format("Oplog references {} attachments", OpsAttachments.size())); + remotestore_impl::ReportMessage(OptionalContext, fmt::format("Oplog references {} attachments", NeededAttachments.size())); CbArrayView ChunkedFilesArray = ContainerObject["chunkedfiles"sv].AsArrayView(); for (CbFieldView ChunkedFileField : ChunkedFilesArray) { CbObjectView ChunkedFileView = ChunkedFileField.AsObjectView(); IoHash RawHash = ChunkedFileView["rawhash"sv].AsHash(); - if (OpsAttachments.contains(RawHash) && (!HasAttachment(RawHash))) + if (NeededAttachments.erase(RawHash) == 1) { - ChunkedInfo Chunked = ReadChunkedInfo(ChunkedFileView); + if (!HasAttachment(RawHash)) + { + ChunkedInfo Chunked = ReadChunkedInfo(ChunkedFileView); + + size_t NeededChunkAttachmentCount = 0; - OnReferencedAttachments(Chunked.ChunkHashes); - OpsAttachments.insert(Chunked.ChunkHashes.begin(), Chunked.ChunkHashes.end()); - OnChunkedAttachment(Chunked); - ZEN_INFO("Requesting chunked attachment '{}' ({}) built from {} chunks", - Chunked.RawHash, - NiceBytes(Chunked.RawSize), - Chunked.ChunkHashes.size()); + OnReferencedAttachments(Chunked.ChunkHashes); + for (const IoHash& ChunkHash : Chunked.ChunkHashes) + { + if (!HasAttachment(ChunkHash)) + { + if (NeededAttachments.insert(ChunkHash).second) + { + NeededChunkAttachmentCount++; + } + } + } + OnChunkedAttachment(Chunked); + + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Requesting chunked attachment '{}' ({}) built from {} chunks, need {} chunks", + Chunked.RawHash, + NiceBytes(Chunked.RawSize), + Chunked.ChunkHashes.size(), + NeededChunkAttachmentCount)); + } } if (remotestore_impl::IsCancelled(OptionalContext)) { @@ -2854,6 +3332,8 @@ ParseOplogContainer(const CbObject& ContainerObject, } } + std::vector<ThinChunkBlockDescription> ThinBlocksDescriptions; + size_t NeedBlockCount = 0; CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); for (CbFieldView BlockField : BlocksArray) @@ -2863,45 +3343,38 @@ ParseOplogContainer(const CbObject& ContainerObject, CbArrayView ChunksArray = BlockView["chunks"sv].AsArrayView(); - std::vector<IoHash> NeededChunks; - NeededChunks.reserve(ChunksArray.Num()); - if (BlockHash == IoHash::Zero) + std::vector<IoHash> ChunkHashes; + ChunkHashes.reserve(ChunksArray.Num()); + for (CbFieldView ChunkField : ChunksArray) { - for (CbFieldView ChunkField : ChunksArray) - { - IoHash ChunkHash = ChunkField.AsBinaryAttachment(); - if (OpsAttachments.erase(ChunkHash) == 1) - { - if (!HasAttachment(ChunkHash)) - { - NeededChunks.emplace_back(ChunkHash); - } - } - } + ChunkHashes.push_back(ChunkField.AsHash()); } - else + ThinBlocksDescriptions.push_back(ThinChunkBlockDescription{.BlockHash = BlockHash, .ChunkRawHashes = std::move(ChunkHashes)}); + } + + for (ThinChunkBlockDescription& ThinBlockDescription : ThinBlocksDescriptions) + { + std::vector<uint32_t> NeededBlockChunkIndexes; + for (uint32_t ChunkIndex = 0; ChunkIndex < ThinBlockDescription.ChunkRawHashes.size(); ChunkIndex++) { - for (CbFieldView ChunkField : ChunksArray) + const IoHash& ChunkHash = ThinBlockDescription.ChunkRawHashes[ChunkIndex]; + if (NeededAttachments.erase(ChunkHash) == 1) { - const IoHash ChunkHash = ChunkField.AsHash(); - if (OpsAttachments.erase(ChunkHash) == 1) + if (!HasAttachment(ChunkHash)) { - if (!HasAttachment(ChunkHash)) - { - NeededChunks.emplace_back(ChunkHash); - } + NeededBlockChunkIndexes.push_back(ChunkIndex); } } } - - if (!NeededChunks.empty()) + if (!NeededBlockChunkIndexes.empty()) { - OnNeedBlock(BlockHash, std::move(NeededChunks)); - if (BlockHash != IoHash::Zero) + if (ThinBlockDescription.BlockHash != IoHash::Zero) { NeedBlockCount++; } + OnNeedBlock(std::move(ThinBlockDescription), std::move(NeededBlockChunkIndexes)); } + if (remotestore_impl::IsCancelled(OptionalContext)) { return RemoteProjectStore::Result{.ErrorCode = gsl::narrow<int>(HttpResponseCode::OK), @@ -2909,6 +3382,7 @@ ParseOplogContainer(const CbObject& ContainerObject, .Reason = "Operation cancelled"}; } } + remotestore_impl::ReportMessage(OptionalContext, fmt::format("Requesting {} of {} attachment blocks", NeedBlockCount, BlocksArray.Num())); @@ -2918,7 +3392,7 @@ ParseOplogContainer(const CbObject& ContainerObject, { IoHash AttachmentHash = LargeChunksField.AsBinaryAttachment(); - if (OpsAttachments.erase(AttachmentHash) == 1) + if (NeededAttachments.erase(AttachmentHash) == 1) { if (!HasAttachment(AttachmentHash)) { @@ -2941,14 +3415,15 @@ ParseOplogContainer(const CbObject& ContainerObject, } RemoteProjectStore::Result -SaveOplogContainer(ProjectStore::Oplog& Oplog, - const CbObject& ContainerObject, - const std::function<void(std::span<IoHash> RawHashes)>& OnReferencedAttachments, - const std::function<bool(const IoHash& RawHash)>& HasAttachment, - const std::function<void(const IoHash& BlockHash, std::vector<IoHash>&& Chunks)>& OnNeedBlock, - const std::function<void(const IoHash& RawHash)>& OnNeedAttachment, - const std::function<void(const ChunkedInfo&)>& OnChunkedAttachment, - JobContext* OptionalContext) +SaveOplogContainer( + ProjectStore::Oplog& Oplog, + const CbObject& ContainerObject, + const std::function<void(std::span<IoHash> RawHashes)>& OnReferencedAttachments, + const std::function<bool(const IoHash& RawHash)>& HasAttachment, + const std::function<void(ThinChunkBlockDescription&& ThinBlockDescription, std::vector<uint32_t>&& NeededChunkIndexes)>& OnNeedBlock, + const std::function<void(const IoHash& RawHash)>& OnNeedAttachment, + const std::function<void(const ChunkedInfo&)>& OnChunkedAttachment, + JobContext* OptionalContext) { using namespace std::literals; @@ -2972,18 +3447,12 @@ SaveOplogContainer(ProjectStore::Oplog& Oplog, } RemoteProjectStore::Result -LoadOplog(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - ProjectStore::Oplog& Oplog, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, - bool ForceDownload, - bool IgnoreMissingAttachments, - bool CleanOplog, - JobContext* OptionalContext) +LoadOplog(LoadOplogContext&& Context) { using namespace std::literals; + std::unique_ptr<OperationLogOutput> LogOutput(std::make_unique<remotestore_impl::JobContextLogOutput>(Context.OptionalJobContext)); + remotestore_impl::DownloadInfo Info; Stopwatch Timer; @@ -2991,25 +3460,25 @@ LoadOplog(CidStore& ChunkStore, std::unordered_set<IoHash, IoHash::Hasher> Attachments; uint64_t BlockCountToDownload = 0; - RemoteProjectStore::RemoteStoreInfo RemoteStoreInfo = RemoteStore.GetInfo(); - remotestore_impl::ReportMessage(OptionalContext, fmt::format("Loading oplog container '{}'", RemoteStoreInfo.ContainerName)); + RemoteProjectStore::RemoteStoreInfo RemoteStoreInfo = Context.RemoteStore.GetInfo(); + remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Loading oplog container '{}'", RemoteStoreInfo.ContainerName)); uint64_t TransferWallTimeMS = 0; Stopwatch LoadContainerTimer; - RemoteProjectStore::LoadContainerResult LoadContainerResult = RemoteStore.LoadContainer(); + RemoteProjectStore::LoadContainerResult LoadContainerResult = Context.RemoteStore.LoadContainer(); TransferWallTimeMS += LoadContainerTimer.GetElapsedTimeMs(); if (LoadContainerResult.ErrorCode) { remotestore_impl::ReportMessage( - OptionalContext, + Context.OptionalJobContext, fmt::format("Failed to load oplog container: '{}', error code: {}", LoadContainerResult.Reason, LoadContainerResult.ErrorCode)); return RemoteProjectStore::Result{.ErrorCode = LoadContainerResult.ErrorCode, .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, .Reason = LoadContainerResult.Reason, .Text = LoadContainerResult.Text}; } - remotestore_impl::ReportMessage(OptionalContext, + remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Loaded container in {} ({})", NiceTimeSpanMs(static_cast<uint64_t>(LoadContainerResult.ElapsedSeconds * 1000)), NiceBytes(LoadContainerResult.ContainerObject.GetSize()))); @@ -3023,22 +3492,27 @@ LoadOplog(CidStore& ChunkStore, Stopwatch LoadAttachmentsTimer; std::atomic_uint64_t DownloadStartMS = (std::uint64_t)-1; - auto HasAttachment = [&Oplog, &ChunkStore, ForceDownload](const IoHash& RawHash) { - if (ForceDownload) + auto HasAttachment = [&Context](const IoHash& RawHash) { + if (Context.ForceDownload) { return false; } - if (ChunkStore.ContainsChunk(RawHash)) + if (Context.ChunkStore.ContainsChunk(RawHash)) { return true; } return false; }; - auto OnNeedBlock = [&RemoteStore, - &ChunkStore, - &NetworkWorkerPool, - &WorkerPool, + struct NeededBlockDownload + { + ThinChunkBlockDescription ThinBlockDescription; + std::vector<uint32_t> NeededChunkIndexes; + }; + + std::vector<NeededBlockDownload> NeededBlockDownloads; + + auto OnNeedBlock = [&Context, &AttachmentsDownloadLatch, &AttachmentsWriteLatch, &AttachmentCount, @@ -3047,8 +3521,8 @@ LoadOplog(CidStore& ChunkStore, &Info, &LoadAttachmentsTimer, &DownloadStartMS, - IgnoreMissingAttachments, - OptionalContext](const IoHash& BlockHash, std::vector<IoHash>&& Chunks) { + &NeededBlockDownloads](ThinChunkBlockDescription&& ThinBlockDescription, + std::vector<uint32_t>&& NeededChunkIndexes) { if (RemoteResult.IsError()) { return; @@ -3056,47 +3530,26 @@ LoadOplog(CidStore& ChunkStore, BlockCountToDownload++; AttachmentCount.fetch_add(1); - if (BlockHash == IoHash::Zero) - { - DownloadAndSaveBlockChunks(ChunkStore, - RemoteStore, - IgnoreMissingAttachments, - OptionalContext, - NetworkWorkerPool, - WorkerPool, + if (ThinBlockDescription.BlockHash == IoHash::Zero) + { + DownloadAndSaveBlockChunks(Context, AttachmentsDownloadLatch, AttachmentsWriteLatch, RemoteResult, Info, LoadAttachmentsTimer, DownloadStartMS, - Chunks); + std::move(ThinBlockDescription), + std::move(NeededChunkIndexes)); } else { - DownloadAndSaveBlock(ChunkStore, - RemoteStore, - IgnoreMissingAttachments, - OptionalContext, - NetworkWorkerPool, - WorkerPool, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, - Info, - LoadAttachmentsTimer, - DownloadStartMS, - BlockHash, - Chunks, - 3); + NeededBlockDownloads.push_back(NeededBlockDownload{.ThinBlockDescription = std::move(ThinBlockDescription), + .NeededChunkIndexes = std::move(NeededChunkIndexes)}); } }; - auto OnNeedAttachment = [&RemoteStore, - &Oplog, - &ChunkStore, - &NetworkWorkerPool, - &WorkerPool, + auto OnNeedAttachment = [&Context, &AttachmentsDownloadLatch, &AttachmentsWriteLatch, &RemoteResult, @@ -3104,9 +3557,7 @@ LoadOplog(CidStore& ChunkStore, &AttachmentCount, &LoadAttachmentsTimer, &DownloadStartMS, - &Info, - IgnoreMissingAttachments, - OptionalContext](const IoHash& RawHash) { + &Info](const IoHash& RawHash) { if (!Attachments.insert(RawHash).second) { return; @@ -3116,12 +3567,7 @@ LoadOplog(CidStore& ChunkStore, return; } AttachmentCount.fetch_add(1); - DownloadAndSaveAttachment(ChunkStore, - RemoteStore, - IgnoreMissingAttachments, - OptionalContext, - NetworkWorkerPool, - WorkerPool, + DownloadAndSaveAttachment(Context, AttachmentsDownloadLatch, AttachmentsWriteLatch, RemoteResult, @@ -3132,18 +3578,13 @@ LoadOplog(CidStore& ChunkStore, }; std::vector<ChunkedInfo> FilesToDechunk; - auto OnChunkedAttachment = [&Oplog, &ChunkStore, &FilesToDechunk, ForceDownload](const ChunkedInfo& Chunked) { - if (ForceDownload || !ChunkStore.ContainsChunk(Chunked.RawHash)) - { - FilesToDechunk.push_back(Chunked); - } - }; + auto OnChunkedAttachment = [&FilesToDechunk](const ChunkedInfo& Chunked) { FilesToDechunk.push_back(Chunked); }; - auto OnReferencedAttachments = [&Oplog](std::span<IoHash> RawHashes) { Oplog.CaptureAddedAttachments(RawHashes); }; + auto OnReferencedAttachments = [&Context](std::span<IoHash> RawHashes) { Context.Oplog.CaptureAddedAttachments(RawHashes); }; // Make sure we retain any attachments we download before writing the oplog - Oplog.EnableUpdateCapture(); - auto _ = MakeGuard([&Oplog]() { Oplog.DisableUpdateCapture(); }); + Context.Oplog.EnableUpdateCapture(); + auto _ = MakeGuard([&Context]() { Context.Oplog.DisableUpdateCapture(); }); CbObject OplogSection; RemoteProjectStore::Result Result = ParseOplogContainer(LoadContainerResult.ContainerObject, @@ -3153,40 +3594,268 @@ LoadOplog(CidStore& ChunkStore, OnNeedAttachment, OnChunkedAttachment, OplogSection, - OptionalContext); + Context.OptionalJobContext); if (Result.ErrorCode != 0) { RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text); } - remotestore_impl::ReportMessage(OptionalContext, + remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Parsed oplog in {}, found {} attachments, {} blocks and {} chunked files to download", NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000.0)), Attachments.size(), BlockCountToDownload, FilesToDechunk.size())); - AttachmentsDownloadLatch.CountDown(); - while (!AttachmentsDownloadLatch.Wait(1000)) + std::vector<IoHash> BlockHashes; + std::vector<IoHash> AllNeededChunkHashes; + BlockHashes.reserve(NeededBlockDownloads.size()); + for (const NeededBlockDownload& BlockDownload : NeededBlockDownloads) { - ptrdiff_t Remaining = AttachmentsDownloadLatch.Remaining(); - if (remotestore_impl::IsCancelled(OptionalContext)) + BlockHashes.push_back(BlockDownload.ThinBlockDescription.BlockHash); + for (uint32_t ChunkIndex : BlockDownload.NeededChunkIndexes) { - if (!RemoteResult.IsError()) + AllNeededChunkHashes.push_back(BlockDownload.ThinBlockDescription.ChunkRawHashes[ChunkIndex]); + } + } + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> AllNeededPartialChunkHashesLookup = BuildHashLookup(AllNeededChunkHashes); + std::vector<std::atomic<bool>> ChunkDownloadedFlags(AllNeededChunkHashes.size()); + std::vector<bool> DownloadedViaLegacyChunkFlag(AllNeededChunkHashes.size(), false); + ChunkBlockAnalyser::BlockResult PartialBlocksResult; + + remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Fetching descriptions for {} blocks", BlockHashes.size())); + + RemoteProjectStore::GetBlockDescriptionsResult BlockDescriptions = + Context.RemoteStore.GetBlockDescriptions(BlockHashes, Context.OptionalCache, Context.CacheBuildId); + + remotestore_impl::ReportMessage(Context.OptionalJobContext, + fmt::format("GetBlockDescriptions took {}. Found {} blocks", + NiceTimeSpanMs(uint64_t(BlockDescriptions.ElapsedSeconds * 1000)), + BlockDescriptions.Blocks.size())); + + std::vector<IoHash> BlocksWithDescription; + BlocksWithDescription.reserve(BlockDescriptions.Blocks.size()); + for (const ChunkBlockDescription& BlockDescription : BlockDescriptions.Blocks) + { + BlocksWithDescription.push_back(BlockDescription.BlockHash); + } + { + auto WantIt = NeededBlockDownloads.begin(); + auto FindIt = BlockDescriptions.Blocks.begin(); + while (WantIt != NeededBlockDownloads.end()) + { + if (FindIt == BlockDescriptions.Blocks.end()) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); + // Fall back to full download as we can't get enough information about the block + DownloadAndSaveBlock(Context, + AttachmentsDownloadLatch, + AttachmentsWriteLatch, + RemoteResult, + Info, + LoadAttachmentsTimer, + DownloadStartMS, + WantIt->ThinBlockDescription.BlockHash, + AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags, + 3); + for (uint32_t BlockChunkIndex : WantIt->NeededChunkIndexes) + { + const IoHash& ChunkHash = WantIt->ThinBlockDescription.ChunkRawHashes[BlockChunkIndex]; + auto It = AllNeededPartialChunkHashesLookup.find(ChunkHash); + ZEN_ASSERT(It != AllNeededPartialChunkHashesLookup.end()); + uint32_t ChunkIndex = It->second; + DownloadedViaLegacyChunkFlag[ChunkIndex] = true; + } + WantIt++; + } + else if (WantIt->ThinBlockDescription.BlockHash == FindIt->BlockHash) + { + // Found + FindIt++; + WantIt++; + } + else + { + // Not a requested block? + ZEN_ASSERT(false); } } - uint64_t PartialTransferWallTimeMS = TransferWallTimeMS; - if (DownloadStartMS != (uint64_t)-1) + } + if (!AllNeededChunkHashes.empty()) + { + std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> PartialBlockDownloadModes; + std::vector<bool> BlockExistsInCache(BlocksWithDescription.size(), false); + + if (Context.PartialBlockRequestMode == EPartialBlockRequestMode::Off) { - PartialTransferWallTimeMS += LoadAttachmentsTimer.GetElapsedTimeMs() - DownloadStartMS.load(); + PartialBlockDownloadModes.resize(BlocksWithDescription.size(), ChunkBlockAnalyser::EPartialBlockDownloadMode::Off); + } + else + { + if (Context.OptionalCache) + { + std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult = + Context.OptionalCache->BlobsExists(Context.CacheBuildId, BlocksWithDescription); + if (CacheExistsResult.size() == BlocksWithDescription.size()) + { + for (size_t BlobIndex = 0; BlobIndex < CacheExistsResult.size(); BlobIndex++) + { + BlockExistsInCache[BlobIndex] = CacheExistsResult[BlobIndex].HasBody; + } + } + uint64_t FoundBlocks = + std::accumulate(BlockExistsInCache.begin(), + BlockExistsInCache.end(), + uint64_t(0u), + [](uint64_t Current, bool Exists) -> uint64_t { return Current + (Exists ? 1 : 0); }); + if (FoundBlocks > 0) + { + remotestore_impl::ReportMessage( + Context.OptionalJobContext, + fmt::format("Found {} out of {} blocks in cache", FoundBlocks, BlockExistsInCache.size())); + } + } + + ChunkBlockAnalyser::EPartialBlockDownloadMode CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off; + ChunkBlockAnalyser::EPartialBlockDownloadMode CachePartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off; + + switch (Context.PartialBlockRequestMode) + { + case EPartialBlockRequestMode::Off: + break; + case EPartialBlockRequestMode::ZenCacheOnly: + CachePartialDownloadMode = Context.CacheMaxRangeCountPerRequest > 1 + ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed + : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange; + CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off; + break; + case EPartialBlockRequestMode::Mixed: + CachePartialDownloadMode = Context.CacheMaxRangeCountPerRequest > 1 + ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed + : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange; + CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange; + break; + case EPartialBlockRequestMode::All: + CachePartialDownloadMode = Context.CacheMaxRangeCountPerRequest > 1 + ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed + : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange; + CloudPartialDownloadMode = Context.StoreMaxRangeCountPerRequest > 1 + ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange + : ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange; + break; + } + + PartialBlockDownloadModes.reserve(BlocksWithDescription.size()); + for (uint32_t BlockIndex = 0; BlockIndex < BlocksWithDescription.size(); BlockIndex++) + { + const bool BlockExistInCache = BlockExistsInCache[BlockIndex]; + PartialBlockDownloadModes.push_back(BlockExistInCache ? CachePartialDownloadMode : CloudPartialDownloadMode); + } + } + + ZEN_ASSERT(PartialBlockDownloadModes.size() == BlocksWithDescription.size()); + + ChunkBlockAnalyser PartialAnalyser( + *LogOutput, + BlockDescriptions.Blocks, + ChunkBlockAnalyser::Options{.IsQuiet = false, + .IsVerbose = false, + .HostLatencySec = Context.StoreLatencySec, + .HostHighSpeedLatencySec = Context.CacheLatencySec, + .HostMaxRangeCountPerRequest = Context.StoreMaxRangeCountPerRequest, + .HostHighSpeedMaxRangeCountPerRequest = Context.CacheMaxRangeCountPerRequest}); + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = + PartialAnalyser.GetNeeded(AllNeededPartialChunkHashesLookup, + [&](uint32_t ChunkIndex) { return !DownloadedViaLegacyChunkFlag[ChunkIndex]; }); + + PartialBlocksResult = PartialAnalyser.CalculatePartialBlockDownloads(NeededBlocks, PartialBlockDownloadModes); + + for (uint32_t FullBlockIndex : PartialBlocksResult.FullBlockIndexes) + { + DownloadAndSaveBlock(Context, + AttachmentsDownloadLatch, + AttachmentsWriteLatch, + RemoteResult, + Info, + LoadAttachmentsTimer, + DownloadStartMS, + BlockDescriptions.Blocks[FullBlockIndex].BlockHash, + AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags, + 3); + } + + for (size_t BlockRangeIndex = 0; BlockRangeIndex < PartialBlocksResult.BlockRanges.size();) + { + size_t RangeCount = 1; + size_t RangesLeft = PartialBlocksResult.BlockRanges.size() - BlockRangeIndex; + const ChunkBlockAnalyser::BlockRangeDescriptor& CurrentBlockRange = PartialBlocksResult.BlockRanges[BlockRangeIndex]; + while (RangeCount < RangesLeft && + CurrentBlockRange.BlockIndex == PartialBlocksResult.BlockRanges[BlockRangeIndex + RangeCount].BlockIndex) + { + RangeCount++; + } + + DownloadAndSavePartialBlock(Context, + AttachmentsDownloadLatch, + AttachmentsWriteLatch, + RemoteResult, + Info, + LoadAttachmentsTimer, + DownloadStartMS, + BlockDescriptions.Blocks[CurrentBlockRange.BlockIndex], + BlockExistsInCache[CurrentBlockRange.BlockIndex], + PartialBlocksResult.BlockRanges, + BlockRangeIndex, + RangeCount, + AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags, + /* RetriesLeft*/ 3); + + BlockRangeIndex += RangeCount; + } + } + + AttachmentsDownloadLatch.CountDown(); + { + ptrdiff_t AttachmentCountToUseForProgress = AttachmentsDownloadLatch.Remaining(); + while (!AttachmentsDownloadLatch.Wait(1000)) + { + ptrdiff_t Remaining = AttachmentsDownloadLatch.Remaining(); + if (remotestore_impl::IsCancelled(Context.OptionalJobContext)) + { + if (!RemoteResult.IsError()) + { + RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); + } + } + uint64_t PartialTransferWallTimeMS = TransferWallTimeMS; + if (DownloadStartMS != (uint64_t)-1) + { + PartialTransferWallTimeMS += LoadAttachmentsTimer.GetElapsedTimeMs() - DownloadStartMS.load(); + } + + uint64_t AttachmentsDownloaded = + Info.AttachmentBlocksDownloaded.load() + Info.AttachmentBlocksRangesDownloaded.load() + Info.AttachmentsDownloaded.load(); + uint64_t AttachmentBytesDownloaded = Info.AttachmentBlockBytesDownloaded.load() + + Info.AttachmentBlockRangeBytesDownloaded.load() + Info.AttachmentBytesDownloaded.load(); + + AttachmentCountToUseForProgress = Max(Remaining, AttachmentCountToUseForProgress); + remotestore_impl::ReportProgress( + Context.OptionalJobContext, + "Loading attachments"sv, + fmt::format( + "{} ({}) downloaded, {} ({}) stored, {} remaining. {}", + AttachmentsDownloaded, + NiceBytes(AttachmentBytesDownloaded), + Info.AttachmentsStored.load(), + NiceBytes(Info.AttachmentBytesStored.load()), + Remaining, + remotestore_impl::GetStats(Context.RemoteStore.GetStats(), Context.OptionalCacheStats, PartialTransferWallTimeMS)), + AttachmentCountToUseForProgress, + Remaining); } - remotestore_impl::ReportProgress( - OptionalContext, - "Loading attachments"sv, - fmt::format("{} remaining. {}", Remaining, remotestore_impl::GetStats(RemoteStore.GetStats(), PartialTransferWallTimeMS)), - AttachmentCount.load(), - Remaining); } if (DownloadStartMS != (uint64_t)-1) { @@ -3195,57 +3864,58 @@ LoadOplog(CidStore& ChunkStore, if (AttachmentCount.load() > 0) { - remotestore_impl::ReportProgress(OptionalContext, - "Loading attachments"sv, - fmt::format("{}", remotestore_impl::GetStats(RemoteStore.GetStats(), TransferWallTimeMS)), - AttachmentCount.load(), - 0); + remotestore_impl::ReportProgress( + Context.OptionalJobContext, + "Loading attachments"sv, + fmt::format("{}", remotestore_impl::GetStats(Context.RemoteStore.GetStats(), Context.OptionalCacheStats, TransferWallTimeMS)), + AttachmentCount.load(), + 0); } AttachmentsWriteLatch.CountDown(); - while (!AttachmentsWriteLatch.Wait(1000)) { - ptrdiff_t Remaining = AttachmentsWriteLatch.Remaining(); - if (remotestore_impl::IsCancelled(OptionalContext)) + ptrdiff_t AttachmentCountToUseForProgress = AttachmentsWriteLatch.Remaining(); + while (!AttachmentsWriteLatch.Wait(1000)) { - if (!RemoteResult.IsError()) + ptrdiff_t Remaining = AttachmentsWriteLatch.Remaining(); + if (remotestore_impl::IsCancelled(Context.OptionalJobContext)) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); + if (!RemoteResult.IsError()) + { + RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); + } } + AttachmentCountToUseForProgress = Max(Remaining, AttachmentCountToUseForProgress); + remotestore_impl::ReportProgress(Context.OptionalJobContext, + "Writing attachments"sv, + fmt::format("{} ({}), {} remaining.", + Info.AttachmentsStored.load(), + NiceBytes(Info.AttachmentBytesStored.load()), + Remaining), + AttachmentCountToUseForProgress, + Remaining); } - remotestore_impl::ReportProgress(OptionalContext, - "Writing attachments"sv, - fmt::format("{} remaining.", Remaining), - AttachmentCount.load(), - Remaining); } if (AttachmentCount.load() > 0) { - remotestore_impl::ReportProgress(OptionalContext, "Writing attachments", ""sv, AttachmentCount.load(), 0); + remotestore_impl::ReportProgress(Context.OptionalJobContext, "Writing attachments", ""sv, AttachmentCount.load(), 0); } if (Result.ErrorCode == 0) { if (!FilesToDechunk.empty()) { - remotestore_impl::ReportMessage(OptionalContext, fmt::format("Dechunking {} attachments", FilesToDechunk.size())); + remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Dechunking {} attachments", FilesToDechunk.size())); Latch DechunkLatch(1); - std::filesystem::path TempFilePath = Oplog.TempPath(); + std::filesystem::path TempFilePath = Context.Oplog.TempPath(); for (const ChunkedInfo& Chunked : FilesToDechunk) { std::filesystem::path TempFileName = TempFilePath / Chunked.RawHash.ToHexString(); DechunkLatch.AddCount(1); - WorkerPool.ScheduleWork( - [&ChunkStore, - &DechunkLatch, - TempFileName, - &Chunked, - &RemoteResult, - IgnoreMissingAttachments, - &Info, - OptionalContext]() { + Context.WorkerPool.ScheduleWork( + [&Context, &DechunkLatch, TempFileName, &Chunked, &RemoteResult, &Info]() { ZEN_TRACE_CPU("DechunkAttachment"); auto _ = MakeGuard([&DechunkLatch, &TempFileName] { @@ -3279,16 +3949,16 @@ LoadOplog(CidStore& ChunkStore, for (std::uint32_t SequenceIndex : Chunked.ChunkSequence) { const IoHash& ChunkHash = Chunked.ChunkHashes[SequenceIndex]; - IoBuffer Chunk = ChunkStore.FindChunkByCid(ChunkHash); + IoBuffer Chunk = Context.ChunkStore.FindChunkByCid(ChunkHash); if (!Chunk) { remotestore_impl::ReportMessage( - OptionalContext, + Context.OptionalJobContext, fmt::format("Missing chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash)); // We only add 1 as the resulting missing count will be 1 for the dechunked file Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) + if (!Context.IgnoreMissingAttachments) { RemoteResult.SetError( gsl::narrow<int>(HttpResponseCode::NotFound), @@ -3306,7 +3976,7 @@ LoadOplog(CidStore& ChunkStore, if (RawHash != ChunkHash) { remotestore_impl::ReportMessage( - OptionalContext, + Context.OptionalJobContext, fmt::format("Mismatching raw hash {} for chunk {} for chunked attachment {}", RawHash, ChunkHash, @@ -3314,7 +3984,7 @@ LoadOplog(CidStore& ChunkStore, // We only add 1 as the resulting missing count will be 1 for the dechunked file Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) + if (!Context.IgnoreMissingAttachments) { RemoteResult.SetError( gsl::narrow<int>(HttpResponseCode::NotFound), @@ -3351,14 +4021,14 @@ LoadOplog(CidStore& ChunkStore, })) { remotestore_impl::ReportMessage( - OptionalContext, + Context.OptionalJobContext, fmt::format("Failed to decompress chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash)); // We only add 1 as the resulting missing count will be 1 for the dechunked file Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) + if (!Context.IgnoreMissingAttachments) { RemoteResult.SetError( gsl::narrow<int>(HttpResponseCode::NotFound), @@ -3380,11 +4050,12 @@ LoadOplog(CidStore& ChunkStore, TmpFile.Close(); TmpBuffer = IoBufferBuilder::MakeFromTemporaryFile(TempFileName); } + uint64_t TmpBufferSize = TmpBuffer.GetSize(); CidStore::InsertResult InsertResult = - ChunkStore.AddChunk(TmpBuffer, Chunked.RawHash, CidStore::InsertMode::kMayBeMovedInPlace); + Context.ChunkStore.AddChunk(TmpBuffer, Chunked.RawHash, CidStore::InsertMode::kMayBeMovedInPlace); if (InsertResult.New) { - Info.AttachmentBytesStored.fetch_add(TmpBuffer.GetSize()); + Info.AttachmentBytesStored.fetch_add(TmpBufferSize); Info.AttachmentsStored.fetch_add(1); } @@ -3407,54 +4078,58 @@ LoadOplog(CidStore& ChunkStore, while (!DechunkLatch.Wait(1000)) { ptrdiff_t Remaining = DechunkLatch.Remaining(); - if (remotestore_impl::IsCancelled(OptionalContext)) + if (remotestore_impl::IsCancelled(Context.OptionalJobContext)) { if (!RemoteResult.IsError()) { RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); remotestore_impl::ReportMessage( - OptionalContext, + Context.OptionalJobContext, fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); } } - remotestore_impl::ReportProgress(OptionalContext, + remotestore_impl::ReportProgress(Context.OptionalJobContext, "Dechunking attachments"sv, fmt::format("{} remaining...", Remaining), FilesToDechunk.size(), Remaining); } - remotestore_impl::ReportProgress(OptionalContext, "Dechunking attachments"sv, ""sv, FilesToDechunk.size(), 0); + remotestore_impl::ReportProgress(Context.OptionalJobContext, "Dechunking attachments"sv, ""sv, FilesToDechunk.size(), 0); } Result = RemoteResult.ConvertResult(); } if (Result.ErrorCode == 0) { - if (CleanOplog) + if (Context.CleanOplog) { - RemoteStore.Flush(); - if (!Oplog.Reset()) + if (Context.OptionalCache) + { + Context.OptionalCache->Flush(100, [](intptr_t) { return /*DontWaitForPendingOperation*/ false; }); + } + if (!Context.Oplog.Reset()) { Result = RemoteProjectStore::Result{.ErrorCode = gsl::narrow<int>(HttpResponseCode::InternalServerError), .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Reason = fmt::format("Failed to clean existing oplog '{}'", Oplog.OplogId())}; - remotestore_impl::ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", Result.ErrorCode, Result.Reason)); + .Reason = fmt::format("Failed to clean existing oplog '{}'", Context.Oplog.OplogId())}; + remotestore_impl::ReportMessage(Context.OptionalJobContext, + fmt::format("Aborting ({}): {}", Result.ErrorCode, Result.Reason)); } } if (Result.ErrorCode == 0) { - remotestore_impl::WriteOplogSection(Oplog, OplogSection, OptionalContext); + remotestore_impl::WriteOplogSection(Context.Oplog, OplogSection, Context.OptionalJobContext); } } Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; - remotestore_impl::LogRemoteStoreStatsDetails(RemoteStore.GetStats()); + remotestore_impl::LogRemoteStoreStatsDetails(Context.RemoteStore.GetStats()); { std::string DownloadDetails; RemoteProjectStore::ExtendedStats ExtendedStats; - if (RemoteStore.GetExtendedStats(ExtendedStats)) + if (Context.RemoteStore.GetExtendedStats(ExtendedStats)) { if (!ExtendedStats.m_ReceivedBytesPerSource.empty()) { @@ -3473,26 +4148,37 @@ LoadOplog(CidStore& ChunkStore, Total += It.second; } - remotestore_impl::ReportMessage(OptionalContext, fmt::format("Downloaded {} ({})", NiceBytes(Total), SB.ToView())); + remotestore_impl::ReportMessage(Context.OptionalJobContext, + fmt::format("Downloaded {} ({})", NiceBytes(Total), SB.ToView())); } } } + uint64_t TotalDownloads = + 1 + Info.AttachmentBlocksDownloaded.load() + Info.AttachmentBlocksRangesDownloaded.load() + Info.AttachmentsDownloaded.load(); + uint64_t TotalBytesDownloaded = Info.OplogSizeBytes + Info.AttachmentBlockBytesDownloaded.load() + + Info.AttachmentBlockRangeBytesDownloaded.load() + Info.AttachmentBytesDownloaded.load(); + remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Loaded oplog '{}' {} in {} ({}), Blocks: {} ({}), Attachments: {} ({}), Stored: {} ({}), Missing: {} {}", + Context.OptionalJobContext, + fmt::format("Loaded oplog '{}' {} in {} ({}), Blocks: {} ({}), BlockRanges: {} ({}), Attachments: {} " + "({}), Total: {} ({}), Stored: {} ({}), Missing: {} {}", RemoteStoreInfo.ContainerName, Result.ErrorCode == 0 ? "SUCCESS" : "FAILURE", NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000.0)), NiceBytes(Info.OplogSizeBytes), Info.AttachmentBlocksDownloaded.load(), NiceBytes(Info.AttachmentBlockBytesDownloaded.load()), + Info.AttachmentBlocksRangesDownloaded.load(), + NiceBytes(Info.AttachmentBlockRangeBytesDownloaded.load()), Info.AttachmentsDownloaded.load(), NiceBytes(Info.AttachmentBytesDownloaded.load()), + TotalDownloads, + NiceBytes(TotalBytesDownloaded), Info.AttachmentsStored.load(), NiceBytes(Info.AttachmentBytesStored.load()), Info.MissingAttachmentCount.load(), - remotestore_impl::GetStats(RemoteStore.GetStats(), TransferWallTimeMS))); + remotestore_impl::GetStats(Context.RemoteStore.GetStats(), Context.OptionalCacheStats, TransferWallTimeMS))); return Result; } @@ -3537,7 +4223,7 @@ RemoteProjectStore::~RemoteProjectStore() #if ZEN_WITH_TESTS -namespace testutils { +namespace projectstore_testutils { using namespace std::literals; static std::string OidAsString(const Oid& Id) @@ -3589,7 +4275,29 @@ namespace testutils { return Result; } -} // namespace testutils + class TestJobContext : public JobContext + { + public: + explicit TestJobContext(int& OpIndex) : m_OpIndex(OpIndex) {} + virtual bool IsCancelled() const { return false; } + virtual void ReportMessage(std::string_view Message) { ZEN_INFO("Job {}: {}", m_OpIndex, Message); } + virtual void ReportProgress(std::string_view CurrentOp, std::string_view Details, ptrdiff_t TotalCount, ptrdiff_t RemainingCount) + { + ZEN_INFO("Job {}: Op '{}'{} {}/{}", + m_OpIndex, + CurrentOp, + Details.empty() ? "" : fmt::format(" {}", Details), + TotalCount - RemainingCount, + TotalCount); + } + + private: + int& m_OpIndex; + }; + +} // namespace projectstore_testutils + +TEST_SUITE_BEGIN("remotestore.projectstore"); struct ExportForceDisableBlocksTrue_ForceTempBlocksFalse { @@ -3616,7 +4324,7 @@ TEST_CASE_TEMPLATE("project.store.export", ExportForceDisableBlocksFalse_ForceTempBlocksTrue) { using namespace std::literals; - using namespace testutils; + using namespace projectstore_testutils; ScopedTemporaryDirectory TempDir; ScopedTemporaryDirectory ExportDir; @@ -3684,56 +4392,712 @@ TEST_CASE_TEMPLATE("project.store.export", false, nullptr); - CHECK(ExportResult.ErrorCode == 0); + REQUIRE(ExportResult.ErrorCode == 0); Ref<ProjectStore::Oplog> OplogImport = Project->NewOplog("oplog2", {}); CHECK(OplogImport); - RemoteProjectStore::Result ImportResult = LoadOplog(CidStore, - *RemoteStore, - *OplogImport, - NetworkPool, - WorkerPool, - /*Force*/ false, - /*IgnoreMissingAttachments*/ false, - /*CleanOplog*/ false, - nullptr); + int OpJobIndex = 0; + TestJobContext OpJobContext(OpJobIndex); + + RemoteProjectStore::Result ImportResult = LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = nullptr, + .CacheBuildId = Oid::Zero, + .Oplog = *OplogImport, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &OpJobContext}); CHECK(ImportResult.ErrorCode == 0); - - RemoteProjectStore::Result ImportForceResult = LoadOplog(CidStore, - *RemoteStore, - *OplogImport, - NetworkPool, - WorkerPool, - /*Force*/ true, - /*IgnoreMissingAttachments*/ false, - /*CleanOplog*/ false, - nullptr); + OpJobIndex++; + + RemoteProjectStore::Result ImportForceResult = LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = nullptr, + .CacheBuildId = Oid::Zero, + .Oplog = *OplogImport, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = true, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &OpJobContext}); CHECK(ImportForceResult.ErrorCode == 0); - - RemoteProjectStore::Result ImportCleanResult = LoadOplog(CidStore, - *RemoteStore, - *OplogImport, - NetworkPool, - WorkerPool, - /*Force*/ false, - /*IgnoreMissingAttachments*/ false, - /*CleanOplog*/ true, - nullptr); + OpJobIndex++; + + RemoteProjectStore::Result ImportCleanResult = LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = nullptr, + .CacheBuildId = Oid::Zero, + .Oplog = *OplogImport, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = true, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &OpJobContext}); CHECK(ImportCleanResult.ErrorCode == 0); - - RemoteProjectStore::Result ImportForceCleanResult = LoadOplog(CidStore, - *RemoteStore, - *OplogImport, - NetworkPool, - WorkerPool, - /*Force*/ true, - /*IgnoreMissingAttachments*/ false, - /*CleanOplog*/ true, - nullptr); + OpJobIndex++; + + RemoteProjectStore::Result ImportForceCleanResult = + LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = nullptr, + .CacheBuildId = Oid::Zero, + .Oplog = *OplogImport, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = true, + .IgnoreMissingAttachments = false, + .CleanOplog = true, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &OpJobContext}); CHECK(ImportForceCleanResult.ErrorCode == 0); + OpJobIndex++; } +// Common oplog setup used by the two tests below. +// Returns a FileRemoteStore backed by ExportDir that has been populated with a SaveOplog call. +// Keeps the test data identical to project.store.export so the two test suites exercise the same blocks/attachments. +static RemoteProjectStore::Result +SetupExportStore(CidStore& CidStore, + ProjectStore::Project& Project, + WorkerThreadPool& NetworkPool, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& ExportDir, + std::shared_ptr<RemoteProjectStore>& OutRemoteStore) +{ + using namespace projectstore_testutils; + using namespace std::literals; + + Ref<ProjectStore::Oplog> Oplog = Project.NewOplog("oplog_export", {}); + if (!Oplog) + { + return RemoteProjectStore::Result{.ErrorCode = -1}; + } + + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), {})); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{77}))); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{7123, 583, 690, 99}))); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{55, 122}))); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( + Oid::NewOid(), + CreateAttachments(std::initializer_list<size_t>{256u * 1024u, 92u * 1024u}, OodleCompressionLevel::None))); + + FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 64u * 1024, + .MaxChunksPerBlock = 1000, + .MaxChunkEmbedSize = 32 * 1024u, + .ChunkFileSizeLimit = 64u * 1024u}, + /*.FolderPath =*/ExportDir, + /*.Name =*/std::string("oplog_export"), + /*.OptionalBaseName =*/std::string(), + /*.ForceDisableBlocks =*/false, + /*.ForceEnableTempBlocks =*/false}; + + OutRemoteStore = CreateFileRemoteStore(Log(), Options); + return SaveOplog(CidStore, + *OutRemoteStore, + Project, + *Oplog, + NetworkPool, + WorkerPool, + Options.MaxBlockSize, + Options.MaxChunksPerBlock, + Options.MaxChunkEmbedSize, + Options.ChunkFileSizeLimit, + /*EmbedLooseFiles*/ true, + /*ForceUpload*/ false, + /*IgnoreMissingAttachments*/ false, + /*OptionalContext*/ nullptr); +} + +// Creates an export store with a single oplog entry that packs six 512 KB chunks into one +// ~3 MB block (MaxBlockSize = 8 MB). The resulting block slack (~1.5 MB) far exceeds the +// 512 KB threshold that ChunkBlockAnalyser requires before it will consider partial-block +// downloads instead of full-block downloads. +// +// This function is self-contained: it creates its own GcManager, CidStore, ProjectStore and +// Project internally so that each call is independent of any outer test context. After +// SaveOplog returns, all persistent data lives on disk inside ExportDir and the caller can +// freely query OutRemoteStore without holding any references to the internal context. +static RemoteProjectStore::Result +SetupPartialBlockExportStore(WorkerThreadPool& NetworkPool, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& ExportDir, + std::shared_ptr<RemoteProjectStore>& OutRemoteStore) +{ + using namespace projectstore_testutils; + using namespace std::literals; + + // Self-contained CAS and project store. Subdirectories of ExportDir keep everything + // together without relying on the outer TEST_CASE's ExportCidStore / ExportProject. + GcManager LocalGc; + CidStore LocalCidStore(LocalGc); + CidStoreConfiguration LocalCidConfig = {.RootDirectory = ExportDir / "cas", .TinyValueThreshold = 1024, .HugeValueThreshold = 4096}; + LocalCidStore.Initialize(LocalCidConfig); + + std::filesystem::path LocalProjectBasePath = ExportDir / "proj"; + ProjectStore LocalProjectStore(LocalCidStore, LocalProjectBasePath, LocalGc, ProjectStore::Configuration{}); + Ref<ProjectStore::Project> LocalProject(LocalProjectStore.NewProject(LocalProjectBasePath / "p"sv, + "p"sv, + (ExportDir / "root").string(), + (ExportDir / "engine").string(), + (ExportDir / "game").string(), + (ExportDir / "game" / "game.uproject").string())); + + Ref<ProjectStore::Oplog> Oplog = LocalProject->NewOplog("oplog_partial_block", {}); + if (!Oplog) + { + return RemoteProjectStore::Result{.ErrorCode = -1}; + } + + // Six 512 KB chunks with OodleCompressionLevel::None so the compressed size stays large + // and the block genuinely exceeds the 512 KB slack threshold. + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( + Oid::NewOid(), + CreateAttachments(std::initializer_list<size_t>{512u * 1024u, 512u * 1024u, 512u * 1024u, 512u * 1024u, 512u * 1024u, 512u * 1024u}, + OodleCompressionLevel::None))); + + // MaxChunkEmbedSize must be larger than the compressed size of each 512 KB chunk + // (OodleCompressionLevel::None → compressed ≈ raw ≈ 512 KB). With the legacy + // 32 KB limit all six chunks would become loose large attachments and no block would + // be created, so we use the production default of 1.5 MB instead. + FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 8u * 1024u * 1024u, + .MaxChunksPerBlock = 1000, + .MaxChunkEmbedSize = RemoteStoreOptions::DefaultMaxChunkEmbedSize, + .ChunkFileSizeLimit = 64u * 1024u * 1024u}, + /*.FolderPath =*/ExportDir, + /*.Name =*/std::string("oplog_partial_block"), + /*.OptionalBaseName =*/std::string(), + /*.ForceDisableBlocks =*/false, + /*.ForceEnableTempBlocks =*/false}; + OutRemoteStore = CreateFileRemoteStore(Log(), Options); + return SaveOplog(LocalCidStore, + *OutRemoteStore, + *LocalProject, + *Oplog, + NetworkPool, + WorkerPool, + Options.MaxBlockSize, + Options.MaxChunksPerBlock, + Options.MaxChunkEmbedSize, + Options.ChunkFileSizeLimit, + /*EmbedLooseFiles*/ true, + /*ForceUpload*/ false, + /*IgnoreMissingAttachments*/ false, + /*OptionalContext*/ nullptr); +} + +// Returns the first block hash that has at least MinChunkCount chunks, or a zero IoHash +// if no qualifying block exists in Store. +static IoHash +FindBlockWithMultipleChunks(RemoteProjectStore& Store, size_t MinChunkCount) +{ + RemoteProjectStore::LoadContainerResult ContainerResult = Store.LoadContainer(); + if (ContainerResult.ErrorCode != 0) + { + return {}; + } + std::vector<IoHash> BlockHashes = GetBlockHashesFromOplog(ContainerResult.ContainerObject); + if (BlockHashes.empty()) + { + return {}; + } + RemoteProjectStore::GetBlockDescriptionsResult Descriptions = Store.GetBlockDescriptions(BlockHashes, nullptr, Oid{}); + if (Descriptions.ErrorCode != 0) + { + return {}; + } + for (const ChunkBlockDescription& Desc : Descriptions.Blocks) + { + if (Desc.ChunkRawHashes.size() >= MinChunkCount) + { + return Desc.BlockHash; + } + } + return {}; +} + +// Loads BlockHash from Source and inserts every even-indexed chunk (0, 2, 4, …) into +// TargetCidStore. Odd-indexed chunks are left absent so that when an import is run +// against the same block, HasAttachment returns false for three non-adjacent positions +// — the minimum needed to exercise the multi-range partial-block download paths. +static void +SeedCidStoreWithAlternateChunks(CidStore& TargetCidStore, RemoteProjectStore& Source, const IoHash& BlockHash) +{ + RemoteProjectStore::LoadAttachmentResult BlockResult = Source.LoadAttachment(BlockHash); + if (BlockResult.ErrorCode != 0 || !BlockResult.Bytes) + { + return; + } + + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(BlockResult.Bytes), RawHash, RawSize); + if (!Compressed) + { + return; + } + CompositeBuffer BlockPayload = Compressed.DecompressToComposite(); + if (!BlockPayload) + { + return; + } + + uint32_t ChunkIndex = 0; + uint64_t HeaderSize = 0; + IterateChunkBlock( + BlockPayload.Flatten(), + [&TargetCidStore, &ChunkIndex](CompressedBuffer&& Chunk, const IoHash& AttachmentHash) { + if (ChunkIndex % 2 == 0) + { + IoBuffer ChunkData = Chunk.GetCompressed().Flatten().AsIoBuffer(); + TargetCidStore.AddChunk(ChunkData, AttachmentHash); + } + ++ChunkIndex; + }, + HeaderSize); +} + +TEST_CASE("project.store.import.context_settings") +{ + using namespace std::literals; + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + std::filesystem::path RootDir = TempDir.Path() / "root"; + std::filesystem::path EngineRootDir = TempDir.Path() / "engine"; + std::filesystem::path ProjectRootDir = TempDir.Path() / "game"; + std::filesystem::path ProjectFilePath = TempDir.Path() / "game" / "game.uproject"; + + // Export-side CAS and project store: used only by SetupExportStore to build the remote store + // payload. Kept separate from the import side so the two CAS instances are disjoint. + GcManager ExportGc; + CidStore ExportCidStore(ExportGc); + CidStoreConfiguration ExportCidConfig = {.RootDirectory = TempDir.Path() / "export_cas", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + ExportCidStore.Initialize(ExportCidConfig); + + std::filesystem::path ExportBasePath = TempDir.Path() / "export_projectstore"; + ProjectStore ExportProjectStore(ExportCidStore, ExportBasePath, ExportGc, ProjectStore::Configuration{}); + Ref<ProjectStore::Project> ExportProject(ExportProjectStore.NewProject(ExportBasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + ProjectRootDir.string(), + ProjectFilePath.string())); + + uint32_t NetworkWorkerCount = Max(GetHardwareConcurrency() / 4u, 2u); + uint32_t WorkerCount = (NetworkWorkerCount < GetHardwareConcurrency()) ? Max(GetHardwareConcurrency() - NetworkWorkerCount, 4u) : 4u; + WorkerThreadPool WorkerPool(WorkerCount); + WorkerThreadPool NetworkPool(NetworkWorkerCount); + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RemoteProjectStore::Result ExportResult = + SetupExportStore(ExportCidStore, *ExportProject, NetworkPool, WorkerPool, ExportDir.Path(), RemoteStore); + REQUIRE(ExportResult.ErrorCode == 0); + + // Import-side CAS and project store: starts empty, mirroring a fresh machine that has never + // downloaded the data. HasAttachment() therefore returns false for every chunk, so the import + // genuinely contacts the remote store without needing ForceDownload on the populate pass. + GcManager ImportGc; + CidStore ImportCidStore(ImportGc); + CidStoreConfiguration ImportCidConfig = {.RootDirectory = TempDir.Path() / "import_cas", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + ImportCidStore.Initialize(ImportCidConfig); + + std::filesystem::path ImportBasePath = TempDir.Path() / "import_projectstore"; + ProjectStore ImportProjectStore(ImportCidStore, ImportBasePath, ImportGc, ProjectStore::Configuration{}); + Ref<ProjectStore::Project> ImportProject(ImportProjectStore.NewProject(ImportBasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + ProjectRootDir.string(), + ProjectFilePath.string())); + + const Oid CacheBuildId = Oid::NewOid(); + BuildStorageCache::Statistics CacheStats; + std::unique_ptr<BuildStorageCache> Cache = CreateInMemoryBuildStorageCache(256u, CacheStats); + auto ResetCacheStats = [&]() { + CacheStats.TotalBytesRead = 0; + CacheStats.TotalBytesWritten = 0; + CacheStats.TotalRequestCount = 0; + CacheStats.TotalRequestTimeUs = 0; + CacheStats.TotalExecutionTimeUs = 0; + CacheStats.PeakSentBytes = 0; + CacheStats.PeakReceivedBytes = 0; + CacheStats.PeakBytesPerSec = 0; + CacheStats.PutBlobCount = 0; + CacheStats.PutBlobByteCount = 0; + }; + + int OpJobIndex = 0; + + TestJobContext OpJobContext(OpJobIndex); + + // Helper: run a LoadOplog against the import-side CAS/project with the given context knobs. + // Each call creates a fresh oplog so repeated calls within one SUBCASE don't short-circuit on + // already-present data. + auto DoImport = [&](BuildStorageCache* OptCache, + EPartialBlockRequestMode Mode, + double StoreLatency, + uint64_t StoreRanges, + double CacheLatency, + uint64_t CacheRanges, + bool PopulateCache, + bool ForceDownload) -> RemoteProjectStore::Result { + Ref<ProjectStore::Oplog> ImportOplog = ImportProject->NewOplog(fmt::format("import_{}", OpJobIndex++), {}); + return LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = OptCache, + .CacheBuildId = CacheBuildId, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = ForceDownload, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = Mode, + .PopulateCache = PopulateCache, + .StoreLatencySec = StoreLatency, + .StoreMaxRangeCountPerRequest = StoreRanges, + .CacheLatencySec = CacheLatency, + .CacheMaxRangeCountPerRequest = CacheRanges, + .OptionalJobContext = &OpJobContext}); + }; + + // Shorthand: Mode=All, low latency, 128 ranges for both store and cache. + auto ImportAll = [&](BuildStorageCache* OptCache, bool Populate, bool Force) { + return DoImport(OptCache, EPartialBlockRequestMode::All, 0.001, 128u, 0.001, 128u, Populate, Force); + }; + + SUBCASE("mode_off_no_cache") + { + // Baseline: no partial block requests, no cache. + RemoteProjectStore::Result R = + DoImport(nullptr, EPartialBlockRequestMode::Off, -1.0, (uint64_t)-1, -1.0, (uint64_t)-1, false, false); + CHECK(R.ErrorCode == 0); + } + + SUBCASE("mode_all_multirange_cloud_no_cache") + { + // StoreMaxRangeCountPerRequest > 1 → MultiRange cloud path. + RemoteProjectStore::Result R = DoImport(nullptr, EPartialBlockRequestMode::All, 0.001, 128u, -1.0, 0u, false, false); + CHECK(R.ErrorCode == 0); + } + + SUBCASE("mode_all_singlerange_cloud_no_cache") + { + // StoreMaxRangeCountPerRequest == 1 → SingleRange cloud path. + RemoteProjectStore::Result R = DoImport(nullptr, EPartialBlockRequestMode::All, 0.001, 1u, -1.0, 0u, false, false); + CHECK(R.ErrorCode == 0); + } + + SUBCASE("mode_mixed_high_latency_no_cache") + { + // High store latency encourages range merging; Mixed uses SingleRange for cloud, Off for cache. + RemoteProjectStore::Result R = DoImport(nullptr, EPartialBlockRequestMode::Mixed, 0.1, 128u, -1.0, 0u, false, false); + CHECK(R.ErrorCode == 0); + } + + SUBCASE("cache_populate_and_hit") + { + // First import: ImportCidStore is empty so all blocks are downloaded from the remote store + // and written to the cache. + RemoteProjectStore::Result PopulateResult = ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); + CHECK(PopulateResult.ErrorCode == 0); + CHECK(CacheStats.PutBlobCount > 0); + + // Re-import with ForceDownload=true: all chunks are now in ImportCidStore but Force overrides + // HasAttachment() so the download logic re-runs and serves blocks from the cache instead of + // the remote store. + ResetCacheStats(); + RemoteProjectStore::Result HitResult = ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/true); + CHECK(HitResult.ErrorCode == 0); + CHECK(CacheStats.PutBlobCount == 0); + // TotalRequestCount covers both full-blob cache hits and partial-range cache hits. + CHECK(CacheStats.TotalRequestCount > 0); + } + + SUBCASE("cache_no_populate_flag") + { + // Cache is provided but PopulateCache=false: blocks are downloaded to ImportCidStore but + // nothing should be written to the cache. + RemoteProjectStore::Result R = ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/false); + CHECK(R.ErrorCode == 0); + CHECK(CacheStats.PutBlobCount == 0); + } + + SUBCASE("mode_zencacheonly_cache_multirange") + { + // Pre-populate the cache via a plain import, then re-import with ZenCacheOnly + + // CacheMaxRangeCountPerRequest=128. With 100% of chunks needed, all blocks go to + // FullBlockIndexes and GetBuildBlob (full blob) is called from the cache. + // CacheMaxRangeCountPerRequest > 1 would route partial downloads through GetBuildBlobRanges + // if the analyser ever emits BlockRanges entries. + RemoteProjectStore::Result Populate = ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); + CHECK(Populate.ErrorCode == 0); + ResetCacheStats(); + + RemoteProjectStore::Result R = DoImport(Cache.get(), EPartialBlockRequestMode::ZenCacheOnly, 0.1, 128u, 0.001, 128u, false, true); + CHECK(R.ErrorCode == 0); + CHECK(CacheStats.TotalRequestCount > 0); + } + + SUBCASE("mode_zencacheonly_cache_singlerange") + { + // Pre-populate the cache, then re-import with ZenCacheOnly + CacheMaxRangeCountPerRequest=1. + // With 100% of chunks needed the analyser sends all blocks to FullBlockIndexes (full-block + // download path), which calls GetBuildBlob with no range offset — a full-blob cache hit. + // The single-range vs multi-range distinction only matters for the partial-block (BlockRanges) + // path, which is not reached when all chunks are needed. + RemoteProjectStore::Result Populate = ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); + CHECK(Populate.ErrorCode == 0); + ResetCacheStats(); + + RemoteProjectStore::Result R = DoImport(Cache.get(), EPartialBlockRequestMode::ZenCacheOnly, 0.1, 128u, 0.001, 1u, false, true); + CHECK(R.ErrorCode == 0); + CHECK(CacheStats.TotalRequestCount > 0); + } + + SUBCASE("mode_all_cache_and_cloud_multirange") + { + // Pre-populate cache; All mode uses multi-range for both the cache and cloud paths. + RemoteProjectStore::Result Populate = ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); + CHECK(Populate.ErrorCode == 0); + ResetCacheStats(); + + RemoteProjectStore::Result R = ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/true); + CHECK(R.ErrorCode == 0); + CHECK(CacheStats.TotalRequestCount > 0); + } + + SUBCASE("partial_block_cloud_multirange") + { + // Export store with 6 × 512 KB chunks packed into one ~3 MB block. + ScopedTemporaryDirectory PartialExportDir; + std::shared_ptr<RemoteProjectStore> PartialRemoteStore; + RemoteProjectStore::Result ExportR = + SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path(), PartialRemoteStore); + REQUIRE(ExportR.ErrorCode == 0); + + // Seeding even-indexed chunks (0, 2, 4) leaves odd ones (1, 3, 5) absent in + // ImportCidStore. Three non-adjacent needed positions → three BlockRangeDescriptors. + IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u); + CHECK(BlockHash != IoHash::Zero); + SeedCidStoreWithAlternateChunks(ImportCidStore, *PartialRemoteStore, BlockHash); + + // StoreMaxRangeCountPerRequest=128 → all three ranges sent in one LoadAttachmentRanges call. + Ref<ProjectStore::Oplog> PartialOplog = ImportProject->NewOplog(fmt::format("partial_cloud_multi_{}", OpJobIndex++), {}); + RemoteProjectStore::Result R = LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = nullptr, + .CacheBuildId = CacheBuildId, + .Oplog = *PartialOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All, + .PopulateCache = false, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = -1.0, + .CacheMaxRangeCountPerRequest = 0u, + .OptionalJobContext = &OpJobContext}); + CHECK(R.ErrorCode == 0); + } + + SUBCASE("partial_block_cloud_singlerange") + { + // Same block layout as partial_block_cloud_multirange but StoreMaxRangeCountPerRequest=1. + // DownloadPartialBlock issues one LoadAttachmentRanges call per range. + ScopedTemporaryDirectory PartialExportDir; + std::shared_ptr<RemoteProjectStore> PartialRemoteStore; + RemoteProjectStore::Result ExportR = + SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path(), PartialRemoteStore); + REQUIRE(ExportR.ErrorCode == 0); + + IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u); + CHECK(BlockHash != IoHash::Zero); + SeedCidStoreWithAlternateChunks(ImportCidStore, *PartialRemoteStore, BlockHash); + + Ref<ProjectStore::Oplog> PartialOplog = ImportProject->NewOplog(fmt::format("partial_cloud_single_{}", OpJobIndex++), {}); + RemoteProjectStore::Result R = LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = nullptr, + .CacheBuildId = CacheBuildId, + .Oplog = *PartialOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All, + .PopulateCache = false, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 1u, + .CacheLatencySec = -1.0, + .CacheMaxRangeCountPerRequest = 0u, + .OptionalJobContext = &OpJobContext}); + CHECK(R.ErrorCode == 0); + } + + SUBCASE("partial_block_cache_multirange") + { + ScopedTemporaryDirectory PartialExportDir; + std::shared_ptr<RemoteProjectStore> PartialRemoteStore; + RemoteProjectStore::Result ExportR = + SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path(), PartialRemoteStore); + REQUIRE(ExportR.ErrorCode == 0); + + IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u); + CHECK(BlockHash != IoHash::Zero); + + // Phase 1: ImportCidStore starts empty → full block download from remote → PutBuildBlob + // populates the cache. + { + Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog(fmt::format("partial_cache_multi_p1_{}", OpJobIndex++), {}); + RemoteProjectStore::Result Phase1R = LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase1Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All, + .PopulateCache = true, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = 0.001, + .CacheMaxRangeCountPerRequest = 128u, + .OptionalJobContext = &OpJobContext}); + CHECK(Phase1R.ErrorCode == 0); + CHECK(CacheStats.PutBlobCount > 0); + } + ResetCacheStats(); + + // Phase 2: fresh CidStore with only even-indexed chunks seeded. + // HasAttachment returns false for odd chunks (1, 3, 5) → three BlockRangeDescriptors. + // Block is in cache from Phase 1 → cache partial path. + // CacheMaxRangeCountPerRequest=128 → SubRangeCount=3 > 1 → GetBuildBlobRanges. + GcManager Phase2Gc; + CidStore Phase2CidStore(Phase2Gc); + CidStoreConfiguration Phase2CidConfig = {.RootDirectory = TempDir.Path() / "partial_cas", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + Phase2CidStore.Initialize(Phase2CidConfig); + SeedCidStoreWithAlternateChunks(Phase2CidStore, *PartialRemoteStore, BlockHash); + + Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog(fmt::format("partial_cache_multi_p2_{}", OpJobIndex++), {}); + RemoteProjectStore::Result Phase2R = LoadOplog(LoadOplogContext{.ChunkStore = Phase2CidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase2Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::ZenCacheOnly, + .PopulateCache = false, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = 0.001, + .CacheMaxRangeCountPerRequest = 128u, + .OptionalJobContext = &OpJobContext}); + CHECK(Phase2R.ErrorCode == 0); + CHECK(CacheStats.TotalRequestCount > 0); + } + + SUBCASE("partial_block_cache_singlerange") + { + ScopedTemporaryDirectory PartialExportDir; + std::shared_ptr<RemoteProjectStore> PartialRemoteStore; + RemoteProjectStore::Result ExportR = + SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path(), PartialRemoteStore); + REQUIRE(ExportR.ErrorCode == 0); + + IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u); + CHECK(BlockHash != IoHash::Zero); + + // Phase 1: full block download from remote into cache. + { + Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog(fmt::format("partial_cache_single_p1_{}", OpJobIndex++), {}); + RemoteProjectStore::Result Phase1R = LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase1Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All, + .PopulateCache = true, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = 0.001, + .CacheMaxRangeCountPerRequest = 128u, + .OptionalJobContext = &OpJobContext}); + CHECK(Phase1R.ErrorCode == 0); + CHECK(CacheStats.PutBlobCount > 0); + } + ResetCacheStats(); + + // Phase 2: fresh CidStore with only even-indexed chunks seeded. + // CacheMaxRangeCountPerRequest=1 → SubRangeCount=Min(3,1)=1 → GetBuildBlob with range + // offset (single-range legacy cache path), called once per needed chunk range. + GcManager Phase2Gc; + CidStore Phase2CidStore(Phase2Gc); + CidStoreConfiguration Phase2CidConfig = {.RootDirectory = TempDir.Path() / "partial_cas_single", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + Phase2CidStore.Initialize(Phase2CidConfig); + SeedCidStoreWithAlternateChunks(Phase2CidStore, *PartialRemoteStore, BlockHash); + + Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog(fmt::format("partial_cache_single_p2_{}", OpJobIndex++), {}); + RemoteProjectStore::Result Phase2R = LoadOplog(LoadOplogContext{.ChunkStore = Phase2CidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase2Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::ZenCacheOnly, + .PopulateCache = false, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = 0.001, + .CacheMaxRangeCountPerRequest = 1u, + .OptionalJobContext = &OpJobContext}); + CHECK(Phase2R.ErrorCode == 0); + CHECK(CacheStats.TotalRequestCount > 0); + } +} + +TEST_SUITE_END(); + #endif // ZEN_WITH_TESTS void diff --git a/src/zenremotestore/projectstore/zenremoteprojectstore.cpp b/src/zenremotestore/projectstore/zenremoteprojectstore.cpp index ab82edbef..115d6438d 100644 --- a/src/zenremotestore/projectstore/zenremoteprojectstore.cpp +++ b/src/zenremotestore/projectstore/zenremoteprojectstore.cpp @@ -159,7 +159,8 @@ public: virtual LoadAttachmentsResult LoadAttachments(const std::vector<IoHash>& RawHashes) override { - std::string LoadRequest = fmt::format("/{}/oplog/{}/rpc"sv, m_Project, m_Oplog); + LoadAttachmentsResult Result; + std::string LoadRequest = fmt::format("/{}/oplog/{}/rpc"sv, m_Project, m_Oplog); CbObject Request; { @@ -187,7 +188,7 @@ public: HttpClient::Response Response = m_Client.Post(LoadRequest, Request, HttpClient::Accept(ZenContentType::kCbPackage)); AddStats(Response); - LoadAttachmentsResult Result = LoadAttachmentsResult{ConvertResult(Response)}; + Result = LoadAttachmentsResult{ConvertResult(Response)}; if (Result.ErrorCode) { Result.Reason = fmt::format("Failed fetching {} oplog attachments from {}/{}/{}. Reason: '{}'", @@ -249,20 +250,49 @@ public: return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent)}}; } + virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes, + BuildStorageCache* OptionalCache, + const Oid& CacheBuildId) override + { + ZEN_UNUSED(BlockHashes, OptionalCache, CacheBuildId); + return GetBlockDescriptionsResult{Result{.ErrorCode = int(HttpResponseCode::NotFound)}}; + } + virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override { + LoadAttachmentResult Result; std::string LoadRequest = fmt::format("/{}/oplog/{}/{}"sv, m_Project, m_Oplog, RawHash); HttpClient::Response Response = m_Client.Download(LoadRequest, m_TempFilePath, HttpClient::Accept(ZenContentType::kCompressedBinary)); AddStats(Response); - LoadAttachmentResult Result = LoadAttachmentResult{ConvertResult(Response)}; - if (!Result.ErrorCode) + Result = LoadAttachmentResult{ConvertResult(Response)}; + if (Result.ErrorCode) { - Result.Bytes = Response.ResponsePayload; - Result.Bytes.MakeOwned(); + Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}/{}. Reason: '{}'", + m_ProjectStoreUrl, + m_Project, + m_Oplog, + RawHash, + Result.Reason); } - if (!Result.ErrorCode) + Result.Bytes = Response.ResponsePayload; + Result.Bytes.MakeOwned(); + return Result; + } + + virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_ASSERT(!Ranges.empty()); + LoadAttachmentRangesResult Result; + std::string LoadRequest = fmt::format("/{}/oplog/{}/{}"sv, m_Project, m_Oplog, RawHash); + HttpClient::Response Response = + m_Client.Download(LoadRequest, m_TempFilePath, HttpClient::Accept(ZenContentType::kCompressedBinary)); + AddStats(Response); + + Result = LoadAttachmentRangesResult{ConvertResult(Response)}; + if (Result.ErrorCode) { Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}/{}. Reason: '{}'", m_ProjectStoreUrl, @@ -271,11 +301,13 @@ public: RawHash, Result.Reason); } + else + { + Result.Ranges = std::vector<std::pair<uint64_t, uint64_t>>(Ranges.begin(), Ranges.end()); + } return Result; } - virtual void Flush() override {} - private: void AddStats(const HttpClient::Response& Result) { |